zhimin-z commited on
Commit
3a48c45
·
1 Parent(s): f51e3c7
Files changed (2) hide show
  1. app.py +39 -192
  2. msr.py +4 -7
app.py CHANGED
@@ -339,25 +339,40 @@ def load_leaderboard_data_from_hf():
339
  # UI FUNCTIONS
340
  # =============================================================================
341
 
342
- def create_monthly_metrics_plot(top_n=5):
343
  """
344
- Create a Plotly figure with dual y-axes showing:
345
  - Left y-axis: Resolved Rate (%) as line curves
346
- - Right y-axis: Total Issues created as bar charts
347
 
348
  Each assistant gets a unique color for both their line and bars.
349
 
350
  Args:
 
351
  top_n: Number of top assistants to show (default: 5)
352
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  # Load from saved dataset
354
  saved_data = load_leaderboard_data_from_hf()
355
 
356
- if not saved_data or 'monthly_metrics' not in saved_data:
357
  # Return an empty figure with a message
358
  fig = go.Figure()
359
  fig.add_annotation(
360
- text="No data available for visualization",
361
  xref="paper", yref="paper",
362
  x=0.5, y=0.5, showarrow=False,
363
  font=dict(size=16)
@@ -369,19 +384,22 @@ def create_monthly_metrics_plot(top_n=5):
369
  )
370
  return fig
371
 
372
- metrics = saved_data['monthly_metrics']
373
- print(f"Loaded monthly metrics from saved dataset")
374
 
375
  # Apply top_n filter if specified
376
  if top_n is not None and top_n > 0 and metrics.get('assistants'):
377
- # Calculate total issues for each assistant
378
  agent_totals = []
379
  for agent_name in metrics['assistants']:
380
  agent_data = metrics['data'].get(agent_name, {})
381
- total_issues = sum(agent_data.get('total_issues', []))
382
- agent_totals.append((agent_name, total_issues))
 
 
 
383
 
384
- # Sort by total issues and take top N
385
  agent_totals.sort(key=lambda x: x[1], reverse=True)
386
  top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
387
 
@@ -396,7 +414,7 @@ def create_monthly_metrics_plot(top_n=5):
396
  # Return an empty figure with a message
397
  fig = go.Figure()
398
  fig.add_annotation(
399
- text="No data available for visualization",
400
  xref="paper", yref="paper",
401
  x=0.5, y=0.5, showarrow=False,
402
  font=dict(size=16)
@@ -456,12 +474,12 @@ def create_monthly_metrics_plot(top_n=5):
456
  secondary_y=False
457
  )
458
 
459
- # Add bar trace for total issues (right y-axis)
460
- # Only show bars for months where assistant has issues
461
  x_bars = []
462
  y_bars = []
463
- for month, count in zip(months, agent_data['total_issues']):
464
- if count > 0: # Only include months with issues
465
  x_bars.append(month)
466
  y_bars.append(count)
467
 
@@ -474,9 +492,9 @@ def create_monthly_metrics_plot(top_n=5):
474
  marker=dict(color=color, opacity=0.6),
475
  legendgroup=agent_name,
476
  showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
477
- hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
478
- 'Month: %{x}<br>' +
479
- 'Total Issues: %{y}<br>' +
480
  '<extra></extra>',
481
  offsetgroup=agent_name # Group bars by assistant for proper spacing
482
  ),
@@ -494,178 +512,7 @@ def create_monthly_metrics_plot(top_n=5):
494
  dtick=10,
495
  showgrid=True
496
  )
497
- fig.update_yaxes(title_text="<b>Total Issues</b>", secondary_y=True)
498
-
499
- # Update layout
500
- show_legend = (top_n is not None and top_n <= 10)
501
- fig.update_layout(
502
- title=None,
503
- hovermode='closest', # Show individual assistant info on hover
504
- barmode='group',
505
- height=600,
506
- showlegend=show_legend,
507
- margin=dict(l=50, r=150 if show_legend else 50, t=50, b=50) # More right margin when legend is shown
508
- )
509
-
510
- return fig
511
-
512
-
513
- def create_discussion_monthly_metrics_plot(top_n=5):
514
- """
515
- Create a Plotly figure with dual y-axes showing discussion metrics:
516
- - Left y-axis: Discussion Resolved Rate (%) as line curves
517
- - Right y-axis: Total Discussions created as bar charts
518
-
519
- Each assistant gets a unique color for both their line and bars.
520
-
521
- Args:
522
- top_n: Number of top assistants to show (default: 5)
523
- """
524
- # Load from saved dataset
525
- saved_data = load_leaderboard_data_from_hf()
526
-
527
- if not saved_data or 'discussion_monthly_metrics' not in saved_data:
528
- # Return an empty figure with a message
529
- fig = go.Figure()
530
- fig.add_annotation(
531
- text="No discussion data available for visualization",
532
- xref="paper", yref="paper",
533
- x=0.5, y=0.5, showarrow=False,
534
- font=dict(size=16)
535
- )
536
- fig.update_layout(
537
- title=None,
538
- xaxis_title=None,
539
- height=500
540
- )
541
- return fig
542
-
543
- metrics = saved_data['discussion_monthly_metrics']
544
- print(f"Loaded discussion monthly metrics from saved dataset")
545
-
546
- # Apply top_n filter if specified
547
- if top_n is not None and top_n > 0 and metrics.get('assistants'):
548
- # Calculate total discussions for each assistant
549
- agent_totals = []
550
- for agent_name in metrics['assistants']:
551
- agent_data = metrics['data'].get(agent_name, {})
552
- total_discussions = agent_data.get('total_discussions')
553
- agent_totals.append((agent_name, total_discussions))
554
-
555
- # Sort by total discussions and take top N
556
- agent_totals.sort(key=lambda x: x[1], reverse=True)
557
- top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
558
-
559
- # Filter metrics to only include top assistants
560
- metrics = {
561
- 'assistants': top_agents,
562
- 'months': metrics['months'],
563
- 'data': {assistant: metrics['data'][assistant] for assistant in top_agents if assistant in metrics['data']}
564
- }
565
-
566
- if not metrics['assistants'] or not metrics['months']:
567
- # Return an empty figure with a message
568
- fig = go.Figure()
569
- fig.add_annotation(
570
- text="No discussion data available for visualization",
571
- xref="paper", yref="paper",
572
- x=0.5, y=0.5, showarrow=False,
573
- font=dict(size=16)
574
- )
575
- fig.update_layout(
576
- title=None,
577
- xaxis_title=None,
578
- height=500
579
- )
580
- return fig
581
-
582
- # Create figure with secondary y-axis
583
- fig = make_subplots(specs=[[{"secondary_y": True}]])
584
-
585
- # Generate unique colors for many assistants using HSL color space
586
- def generate_color(index, total):
587
- """Generate distinct colors using HSL color space for better distribution"""
588
- hue = (index * 360 / total) % 360
589
- saturation = 70 + (index % 3) * 10 # Vary saturation slightly
590
- lightness = 45 + (index % 2) * 10 # Vary lightness slightly
591
- return f'hsl({hue}, {saturation}%, {lightness}%)'
592
-
593
- assistants = metrics['assistants']
594
- months = metrics['months']
595
- data = metrics['data']
596
-
597
- # Generate colors for all assistants
598
- agent_colors = {assistant: generate_color(idx, len(assistants)) for idx, assistant in enumerate(assistants)}
599
-
600
- # Add traces for each assistant
601
- for idx, agent_name in enumerate(assistants):
602
- color = agent_colors[agent_name]
603
- agent_data = data[agent_name]
604
-
605
- # Add line trace for resolved rate (left y-axis)
606
- resolved_rates = agent_data['resolved_rates']
607
- # Filter out None values for plotting
608
- x_resolved = [month for month, rate in zip(months, resolved_rates) if rate is not None]
609
- y_resolved = [rate for rate in resolved_rates if rate is not None]
610
-
611
- if x_resolved and y_resolved: # Only add trace if there's data
612
- fig.add_trace(
613
- go.Scatter(
614
- x=x_resolved,
615
- y=y_resolved,
616
- name=agent_name,
617
- mode='lines+markers',
618
- line=dict(color=color, width=2),
619
- marker=dict(size=8),
620
- legendgroup=agent_name,
621
- showlegend=(top_n is not None and top_n <= 10), # Show legend for top N assistants
622
- hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
623
- 'Month: %{x}<br>' +
624
- 'Discussion Resolved Rate: %{y:.2f}%<br>' +
625
- '<extra></extra>'
626
- ),
627
- secondary_y=False
628
- )
629
-
630
- # Add bar trace for total discussions (right y-axis)
631
- # Only show bars for months where assistant has discussions
632
- x_bars = []
633
- y_bars = []
634
- for month, count in zip(months, agent_data['total_discussions']):
635
- if count > 0: # Only include months with discussions
636
- x_bars.append(month)
637
- y_bars.append(count)
638
-
639
- if x_bars and y_bars: # Only add trace if there's data
640
- fig.add_trace(
641
- go.Bar(
642
- x=x_bars,
643
- y=y_bars,
644
- name=agent_name,
645
- marker=dict(color=color, opacity=0.6),
646
- legendgroup=agent_name,
647
- showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
648
- hovertemplate='<b>Assistant: %{fullData.name}</b><br>' +
649
- 'Month: %{x}<br>' +
650
- 'Total Discussions: %{y}<br>' +
651
- '<extra></extra>',
652
- offsetgroup=agent_name # Group bars by assistant for proper spacing
653
- ),
654
- secondary_y=True
655
- )
656
-
657
- # Update axes labels
658
- fig.update_xaxes(title_text=None)
659
- fig.update_yaxes(
660
- title_text="<b>Discussion Resolved Rate (%)</b>",
661
- range=[0, 100],
662
- secondary_y=False,
663
- showticklabels=True,
664
- tickmode='linear',
665
- dtick=10,
666
- showgrid=True
667
- )
668
- fig.update_yaxes(title_text="<b>Total Discussions</b>", secondary_y=True)
669
 
670
  # Update layout
671
  show_legend = (top_n is not None and top_n <= 10)
@@ -962,7 +809,7 @@ with gr.Blocks(title="SWE Assistant Issue & Discussion Leaderboard", theme=gr.th
962
 
963
  # Load discussion monthly metrics when app starts
964
  app.load(
965
- fn=lambda: create_discussion_monthly_metrics_plot(),
966
  inputs=[],
967
  outputs=[discussion_metrics_plot]
968
  )
 
339
  # UI FUNCTIONS
340
  # =============================================================================
341
 
342
+ def create_monthly_metrics_plot(type="issue", top_n=5):
343
  """
344
+ Create a Plotly figure with dual y-axes showing monthly metrics:
345
  - Left y-axis: Resolved Rate (%) as line curves
346
+ - Right y-axis: Total count (Issues or Discussions) as bar charts
347
 
348
  Each assistant gets a unique color for both their line and bars.
349
 
350
  Args:
351
+ type: Type of metrics to display - "issue" or "discussion" (default: "issue")
352
  top_n: Number of top assistants to show (default: 5)
353
  """
354
+ # Determine metrics key and field names based on type
355
+ if type == "discussion":
356
+ metrics_key = 'discussion_monthly_metrics'
357
+ total_field = 'total_discussions'
358
+ no_data_msg = "No discussion data available for visualization"
359
+ total_label = "Total Discussions"
360
+ print_msg = "discussion"
361
+ else: # default to "issue"
362
+ metrics_key = 'issue_monthly_metrics'
363
+ total_field = 'total_issues'
364
+ no_data_msg = "No data available for visualization"
365
+ total_label = "Total Issues"
366
+ print_msg = "issue"
367
+
368
  # Load from saved dataset
369
  saved_data = load_leaderboard_data_from_hf()
370
 
371
+ if not saved_data or metrics_key not in saved_data:
372
  # Return an empty figure with a message
373
  fig = go.Figure()
374
  fig.add_annotation(
375
+ text=no_data_msg,
376
  xref="paper", yref="paper",
377
  x=0.5, y=0.5, showarrow=False,
378
  font=dict(size=16)
 
384
  )
385
  return fig
386
 
387
+ metrics = saved_data[metrics_key]
388
+ print(f"Loaded {print_msg} monthly metrics from saved dataset")
389
 
390
  # Apply top_n filter if specified
391
  if top_n is not None and top_n > 0 and metrics.get('assistants'):
392
+ # Calculate total count for each assistant
393
  agent_totals = []
394
  for agent_name in metrics['assistants']:
395
  agent_data = metrics['data'].get(agent_name, {})
396
+ if type == "discussion":
397
+ total_count = agent_data.get(total_field, 0)
398
+ else:
399
+ total_count = sum(agent_data.get(total_field, []))
400
+ agent_totals.append((agent_name, total_count))
401
 
402
+ # Sort by total count and take top N
403
  agent_totals.sort(key=lambda x: x[1], reverse=True)
404
  top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
405
 
 
414
  # Return an empty figure with a message
415
  fig = go.Figure()
416
  fig.add_annotation(
417
+ text=no_data_msg,
418
  xref="paper", yref="paper",
419
  x=0.5, y=0.5, showarrow=False,
420
  font=dict(size=16)
 
474
  secondary_y=False
475
  )
476
 
477
+ # Add bar trace for total count (right y-axis)
478
+ # Only show bars for months where assistant has data
479
  x_bars = []
480
  y_bars = []
481
+ for month, count in zip(months, agent_data[total_field]):
482
+ if count > 0: # Only include months with data
483
  x_bars.append(month)
484
  y_bars.append(count)
485
 
 
492
  marker=dict(color=color, opacity=0.6),
493
  legendgroup=agent_name,
494
  showlegend=False, # Hide duplicate legend entry (already shown in Scatter)
495
+ hovertemplate=f'<b>Assistant: %{{fullData.name}}</b><br>' +
496
+ f'Month: %{{x}}<br>' +
497
+ f'{total_label}: %{{y}}<br>' +
498
  '<extra></extra>',
499
  offsetgroup=agent_name # Group bars by assistant for proper spacing
500
  ),
 
512
  dtick=10,
513
  showgrid=True
514
  )
515
+ fig.update_yaxes(title_text=f"<b>{total_label}</b>", secondary_y=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
 
517
  # Update layout
518
  show_legend = (top_n is not None and top_n <= 10)
 
809
 
810
  # Load discussion monthly metrics when app starts
811
  app.load(
812
+ fn=lambda: create_monthly_metrics_plot(type="discussion"),
813
  inputs=[],
814
  outputs=[discussion_metrics_plot]
815
  )
msr.py CHANGED
@@ -1198,7 +1198,7 @@ def construct_leaderboard_from_metadata(all_metadata_dict, assistants, wanted_re
1198
  return cache_dict
1199
 
1200
 
1201
- def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics, wanted_issues=None, discussion_monthly_metrics=None):
1202
  """Save leaderboard data, monthly metrics, wanted issues, and discussion metrics to HuggingFace dataset."""
1203
  try:
1204
  token = get_hf_token()
@@ -1210,9 +1210,6 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics, wanted_issues
1210
  if wanted_issues is None:
1211
  wanted_issues = []
1212
 
1213
- if discussion_monthly_metrics is None:
1214
- discussion_monthly_metrics = {'assistants': [], 'months': [], 'data': {}}
1215
-
1216
  combined_data = {
1217
  'metadata': {
1218
  'last_updated': datetime.now(timezone.utc).isoformat(),
@@ -1222,7 +1219,7 @@ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics, wanted_issues
1222
  'patch_wanted_labels': PATCH_WANTED_LABELS
1223
  },
1224
  'leaderboard': leaderboard_dict,
1225
- 'monthly_metrics': monthly_metrics,
1226
  'wanted_issues': wanted_issues,
1227
  'discussion_monthly_metrics': discussion_monthly_metrics
1228
  }
@@ -1311,12 +1308,12 @@ def mine_all_agents():
1311
  leaderboard_dict = construct_leaderboard_from_metadata(
1312
  agent_issues, assistants, wanted_resolved, agent_discussions
1313
  )
1314
- monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
1315
  discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
1316
  agent_discussions, assistants
1317
  )
1318
  save_leaderboard_data_to_hf(
1319
- leaderboard_dict, monthly_metrics, wanted_open, discussion_monthly_metrics
1320
  )
1321
 
1322
  except Exception as e:
 
1198
  return cache_dict
1199
 
1200
 
1201
+ def save_leaderboard_data_to_hf(leaderboard_dict, issue_monthly_metrics, wanted_issues=None, discussion_monthly_metrics=None):
1202
  """Save leaderboard data, monthly metrics, wanted issues, and discussion metrics to HuggingFace dataset."""
1203
  try:
1204
  token = get_hf_token()
 
1210
  if wanted_issues is None:
1211
  wanted_issues = []
1212
 
 
 
 
1213
  combined_data = {
1214
  'metadata': {
1215
  'last_updated': datetime.now(timezone.utc).isoformat(),
 
1219
  'patch_wanted_labels': PATCH_WANTED_LABELS
1220
  },
1221
  'leaderboard': leaderboard_dict,
1222
+ 'issue_monthly_metrics': issue_monthly_metrics,
1223
  'wanted_issues': wanted_issues,
1224
  'discussion_monthly_metrics': discussion_monthly_metrics
1225
  }
 
1308
  leaderboard_dict = construct_leaderboard_from_metadata(
1309
  agent_issues, assistants, wanted_resolved, agent_discussions
1310
  )
1311
+ issue_monthly_metrics = calculate_monthly_metrics_by_agent(agent_issues, assistants)
1312
  discussion_monthly_metrics = calculate_monthly_metrics_by_agent_discussions(
1313
  agent_discussions, assistants
1314
  )
1315
  save_leaderboard_data_to_hf(
1316
+ leaderboard_dict, issue_monthly_metrics, wanted_open, discussion_monthly_metrics
1317
  )
1318
 
1319
  except Exception as e: