Spaces:

SWE-Arena
/

SWE-Issue

Running

App Files Files Community

zhimin-z commited on 19 days ago

Commit

3a48c45

1 Parent(s): f51e3c7

rf

Browse files

Files changed (2) hide show

app.py +39 -192
msr.py +4 -7

app.py CHANGED Viewed

@@ -339,25 +339,40 @@ def load_leaderboard_data_from_hf():
 # UI FUNCTIONS
 # =============================================================================
-def create_monthly_metrics_plot(top_n=5):
     """
-    Create a Plotly figure with dual y-axes showing:
     - Left y-axis: Resolved Rate (%) as line curves
-    - Right y-axis: Total Issues created as bar charts
     Each assistant gets a unique color for both their line and bars.
     Args:
         top_n: Number of top assistants to show (default: 5)
     """
     # Load from saved dataset
     saved_data = load_leaderboard_data_from_hf()
-    if not saved_data or 'monthly_metrics' not in saved_data:
         # Return an empty figure with a message
         fig = go.Figure()
         fig.add_annotation(
-            text="No data available for visualization",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
             font=dict(size=16)
@@ -369,19 +384,22 @@ def create_monthly_metrics_plot(top_n=5):
         )
         return fig
-    metrics = saved_data['monthly_metrics']
-    print(f"Loaded monthly metrics from saved dataset")
     # Apply top_n filter if specified
     if top_n is not None and top_n > 0 and metrics.get('assistants'):
-        # Calculate total issues for each assistant
         agent_totals = []
         for agent_name in metrics['assistants']:
             agent_data = metrics['data'].get(agent_name, {})
-            total_issues = sum(agent_data.get('total_issues', []))
-            agent_totals.append((agent_name, total_issues))
-        # Sort by total issues and take top N
         agent_totals.sort(key=lambda x: x[1], reverse=True)
         top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
@@ -396,7 +414,7 @@ def create_monthly_metrics_plot(top_n=5):
         # Return an empty figure with a message
         fig = go.Figure()
         fig.add_annotation(
-            text="No data available for visualization",
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
             font=dict(size=16)
@@ -456,12 +474,12 @@ def create_monthly_metrics_plot(top_n=5):
                 secondary_y=False
             )
-        # Add bar trace for total issues (right y-axis)
-        # Only show bars for months where assistant has issues
         x_bars = []
         y_bars = []
-        for month, count in zip(months, agent_data['total_issues']):
-            if count > 0:  # Only include months with issues
                 x_bars.append(month)
                 y_bars.append(count)
@@ -474,9 +492,9 @@ def create_monthly_metrics_plot(top_n=5):
                     marker=dict(color=color, opacity=0.6),
                     legendgroup=agent_name,
                     showlegend=False,  # Hide duplicate legend entry (already shown in Scatter)
-                    hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
-                                 'Month: %{x}<br>' +
-                                 'Total Issues: %{y}<br>' +
                                  '<extra></extra>',
                     offsetgroup=agent_name  # Group bars by assistant for proper spacing
                 ),
@@ -494,178 +512,7 @@ def create_monthly_metrics_plot(top_n=5):
         dtick=10,
         showgrid=True
     )
-    fig.update_yaxes(title_text="<b>Total Issues</b>", secondary_y=True)
-    # Update layout
-    show_legend = (top_n is not None and top_n <= 10)
-    fig.update_layout(
-        title=None,
-        hovermode='closest',  # Show individual assistant info on hover
-        barmode='group',
-        height=600,
-        showlegend=show_legend,
-        margin=dict(l=50, r=150 if show_legend else 50, t=50, b=50)  # More right margin when legend is shown
-    )
-    return fig
-def create_discussion_monthly_metrics_plot(top_n=5):
-    """
-    Create a Plotly figure with dual y-axes showing discussion metrics:
-    - Left y-axis: Discussion Resolved Rate (%) as line curves
-    - Right y-axis: Total Discussions created as bar charts
-    Each assistant gets a unique color for both their line and bars.
-    Args:
-        top_n: Number of top assistants to show (default: 5)
-    """
-    # Load from saved dataset
-    saved_data = load_leaderboard_data_from_hf()
-    if not saved_data or 'discussion_monthly_metrics' not in saved_data:
-        # Return an empty figure with a message
-        fig = go.Figure()
-        fig.add_annotation(
-            text="No discussion data available for visualization",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
-        )
-        fig.update_layout(
-            title=None,
-            xaxis_title=None,
-            height=500
-        )
-        return fig
-    metrics = saved_data['discussion_monthly_metrics']
-    print(f"Loaded discussion monthly metrics from saved dataset")
-    # Apply top_n filter if specified
-    if top_n is not None and top_n > 0 and metrics.get('assistants'):
-        # Calculate total discussions for each assistant
-        agent_totals = []
-        for agent_name in metrics['assistants']:
-            agent_data = metrics['data'].get(agent_name, {})
-            total_discussions = agent_data.get('total_discussions')
-            agent_totals.append((agent_name, total_discussions))
-        # Sort by total discussions and take top N
-        agent_totals.sort(key=lambda x: x[1], reverse=True)
-        top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
-        # Filter metrics to only include top assistants
-        metrics = {
-            'assistants': top_agents,
-            'months': metrics['months'],
-            'data': {assistant: metrics['data'][assistant] for assistant in top_agents if assistant in metrics['data']}
-        }
-    if not metrics['assistants'] or not metrics['months']:
-        # Return an empty figure with a message
-        fig = go.Figure()
-        fig.add_annotation(
-            text="No discussion data available for visualization",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
-        )
-        fig.update_layout(
-            title=None,
-            xaxis_title=None,
-            height=500
-        )
-        return fig
-    # Create figure with secondary y-axis
-    fig = make_subplots(specs=[[{"secondary_y": True}]])
-    # Generate unique colors for many assistants using HSL color space
-    def generate_color(index, total):
-        """Generate distinct colors using HSL color space for better distribution"""
-        hue = (index * 360 / total) % 360
-        saturation = 70 + (index % 3) * 10  # Vary saturation slightly
-        lightness = 45 + (index % 2) * 10   # Vary lightness slightly
-        return f'hsl({hue}, {saturation}%, {lightness}%)'
-    assistants = metrics['assistants']
-    months = metrics['months']
-    data = metrics['data']
-    # Generate colors for all assistants
-    agent_colors = {assistant: generate_color(idx, len(assistants)) for idx, assistant in enumerate(assistants)}
-    # Add traces for each assistant
-    for idx, agent_name in enumerate(assistants):
-        color = agent_colors[agent_name]
-        agent_data = data[agent_name]
-        # Add line trace for resolved rate (left y-axis)
-        resolved_rates = agent_data['resolved_rates']
-        # Filter out None values for plotting
-        x_resolved = [month for month, rate in zip(months, resolved_rates) if rate is not None]
-        y_resolved = [rate for rate in resolved_rates if rate is not None]
-        if x_resolved and y_resolved:  # Only add trace if there's data
-            fig.add_trace(
-                go.Scatter(
-                    x=x_resolved,
-                    y=y_resolved,
-                    name=agent_name,
-                    mode='lines+markers',
-                    line=dict(color=color, width=2),
-                    marker=dict(size=8),
-                    legendgroup=agent_name,
-                    showlegend=(top_n is not None and top_n <= 10),  # Show legend for top N assistants
-                    hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
-                                 'Month: %{x}<br>' +
-                                 'Discussion Resolved Rate: %{y:.2f}%<br>' +
-                                 '<extra></extra>'
-                ),
-                secondary_y=False
-            )
-        # Add bar trace for total discussions (right y-axis)
-        # Only show bars for months where assistant has discussions
-        x_bars = []
-        y_bars = []
-        for month, count in zip(months, agent_data['total_discussions']):
-            if count > 0:  # Only include months with discussions
-                x_bars.append(month)
-                y_bars.append(count)
-        if x_bars and y_bars:  # Only add trace if there's data
-            fig.add_trace(
-                go.Bar(
-                    x=x_bars,
-                    y=y_bars,
-                    name=agent_name,
-                    marker=dict(color=color, opacity=0.6),
-                    legendgroup=agent_name,
-                    showlegend=False,  # Hide duplicate legend entry (already shown in Scatter)
-                    hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
-                                 'Month: %{x}<br>' +
-                                 'Total Discussions: %{y}<br>' +
-                                 '<extra></extra>',
-                    offsetgroup=agent_name  # Group bars by assistant for proper spacing
-                ),
-                secondary_y=True
-            )
-    # Update axes labels
-    fig.update_xaxes(title_text=None)
-    fig.update_yaxes(
-        title_text="<b>Discussion Resolved Rate (%)</b>",
-        range=[0, 100],
-        secondary_y=False,
-        showticklabels=True,
-        tickmode='linear',
-        dtick=10,
-        showgrid=True
-    )
-    fig.update_yaxes(title_text="<b>Total Discussions</b>", secondary_y=True)
     # Update layout
     show_legend = (top_n is not None and top_n <= 10)
@@ -962,7 +809,7 @@ with gr.Blocks(title="SWE Assistant Issue & Discussion Leaderboard", theme=gr.th
             # Load discussion monthly metrics when app starts
             app.load(
-                fn=lambda: create_discussion_monthly_metrics_plot(),
                 inputs=[],
                 outputs=[discussion_metrics_plot]
             )

 # UI FUNCTIONS
 # =============================================================================
+def create_monthly_metrics_plot(type="issue", top_n=5):
     """
+    Create a Plotly figure with dual y-axes showing monthly metrics:
     - Left y-axis: Resolved Rate (%) as line curves
+    - Right y-axis: Total count (Issues or Discussions) as bar charts
     Each assistant gets a unique color for both their line and bars.
     Args:
+        type: Type of metrics to display - "issue" or "discussion" (default: "issue")
         top_n: Number of top assistants to show (default: 5)
     """
+    # Determine metrics key and field names based on type
+    if type == "discussion":
+        metrics_key = 'discussion_monthly_metrics'
+        total_field = 'total_discussions'
+        no_data_msg = "No discussion data available for visualization"
+        total_label = "Total Discussions"
+        print_msg = "discussion"
+    else:  # default to "issue"
+        metrics_key = 'issue_monthly_metrics'
+        total_field = 'total_issues'
+        no_data_msg = "No data available for visualization"
+        total_label = "Total Issues"
+        print_msg = "issue"
     # Load from saved dataset
     saved_data = load_leaderboard_data_from_hf()
+    if not saved_data or metrics_key not in saved_data:
         # Return an empty figure with a message
         fig = go.Figure()
         fig.add_annotation(
+            text=no_data_msg,
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
             font=dict(size=16)
         )
         return fig
+    metrics = saved_data[metrics_key]
+    print(f"Loaded {print_msg} monthly metrics from saved dataset")
     # Apply top_n filter if specified
     if top_n is not None and top_n > 0 and metrics.get('assistants'):
+        # Calculate total count for each assistant
         agent_totals = []
         for agent_name in metrics['assistants']:
             agent_data = metrics['data'].get(agent_name, {})
+            if type == "discussion":
+                total_count = agent_data.get(total_field, 0)
+            else:
+                total_count = sum(agent_data.get(total_field, []))
+            agent_totals.append((agent_name, total_count))
+        # Sort by total count and take top N
         agent_totals.sort(key=lambda x: x[1], reverse=True)
         top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
         # Return an empty figure with a message
         fig = go.Figure()
         fig.add_annotation(
+            text=no_data_msg,
             xref="paper", yref="paper",
             x=0.5, y=0.5, showarrow=False,
             font=dict(size=16)
                 secondary_y=False
             )
+        # Add bar trace for total count (right y-axis)
+        # Only show bars for months where assistant has data
         x_bars = []
         y_bars = []
+        for month, count in zip(months, agent_data[total_field]):
+            if count > 0:  # Only include months with data
                 x_bars.append(month)
                 y_bars.append(count)
                     marker=dict(color=color, opacity=0.6),
                     legendgroup=agent_name,
                     showlegend=False,  # Hide duplicate legend entry (already shown in Scatter)
+                    hovertemplate=f'<b>Assistant: %{{fullData.name}}</b><br>' +
+                                 f'Month: %{{x}}<br>' +
+                                 f'{total_label}: %{{y}}<br>' +
                                  '<extra></extra>',
                     offsetgroup=agent_name  # Group bars by assistant for proper spacing
                 ),
         dtick=10,
         showgrid=True
     )
+    fig.update_yaxes(title_text=f"<b>{total_label}</b>", secondary_y=True)
     # Update layout
     show_legend = (top_n is not None and top_n <= 10)
             # Load discussion monthly metrics when app starts
             app.load(
+                fn=lambda: create_monthly_metrics_plot(type="discussion"),
                 inputs=[],
                 outputs=[discussion_metrics_plot]
             )

msr.py CHANGED Viewed

@@ -1198,7 +1198,7 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
     return cache_dict
-def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics, wanted_issues=None, discussion_monthly_metrics=None):
     """Save leaderboard data, monthly metrics, wanted issues, and discussion metrics to HuggingFace dataset."""
     try:
         token = get_hf_token()
@@ -1210,9 +1210,6 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics, wanted_issues
         if wanted_issues is None:
             wanted_issues = []
-        if discussion_monthly_metrics is None:
-            discussion_monthly_metrics = {'assistants': [], 'months': [], 'data': {}}
         combined_data = {
             'metadata': {
                 'last_updated': datetime.now(timezone.utc).isoformat(),
@@ -1222,7 +1219,7 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics, wanted_issues
                 'patch_wanted_labels': PATCH_WANTED_LABELS
             },
             'leaderboard': leaderboard_dict,
-            'monthly_metrics': monthly_metrics,
             'wanted_issues': wanted_issues,
             'discussion_monthly_metrics': discussion_monthly_metrics
         }
@@ -1311,12 +1308,12 @@ def mine_all_agents():
         leaderboard_dict = construct_leaderboard_from_metadata(
             agent_issues, assistants, wanted_resolved, agent_discussions
         )
-        monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
         discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
             agent_discussions, assistants
         )
         save_leaderboard_data_to_hf(
-            leaderboard_dict, monthly_metrics, wanted_open, discussion_monthly_metrics
         )
     except Exception as e:

     return cache_dict
+def save_leaderboard_data_to_hf(leaderboard_dict, issue_monthly_metrics, wanted_issues=None, discussion_monthly_metrics=None):
     """Save leaderboard data, monthly metrics, wanted issues, and discussion metrics to HuggingFace dataset."""
     try:
         token = get_hf_token()
         if wanted_issues is None:
             wanted_issues = []
         combined_data = {
             'metadata': {
                 'last_updated': datetime.now(timezone.utc).isoformat(),
                 'patch_wanted_labels': PATCH_WANTED_LABELS
             },
             'leaderboard': leaderboard_dict,
+            'issue_monthly_metrics': issue_monthly_metrics,
             'wanted_issues': wanted_issues,
             'discussion_monthly_metrics': discussion_monthly_metrics
         }
         leaderboard_dict = construct_leaderboard_from_metadata(
             agent_issues, assistants, wanted_resolved, agent_discussions
         )
+        issue_monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
         discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
             agent_discussions, assistants
         )
         save_leaderboard_data_to_hf(
+            leaderboard_dict, issue_monthly_metrics, wanted_open, discussion_monthly_metrics
         )
     except Exception as e: