zhiminy commited on
Commit
4b09f5e
·
1 Parent(s): cfe8a41
Files changed (2) hide show
  1. app.py +163 -4
  2. msr.py +226 -0
app.py CHANGED
@@ -938,6 +938,48 @@ def get_hf_token():
938
  return token
939
 
940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
941
  def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, token, max_retries=5):
942
  """
943
  Upload file to HuggingFace with exponential backoff retry logic.
@@ -1025,6 +1067,78 @@ def save_agent_to_hf(data):
1025
  # DATA MANAGEMENT
1026
  # =============================================================================
1027
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1028
  def mine_all_agents():
1029
  """
1030
  Mine issue metadata for all agents within UPDATE_TIME_FRAME_DAYS and save to HuggingFace.
@@ -1122,6 +1236,13 @@ def mine_all_agents():
1122
  print(f" BigQuery queries executed: 1")
1123
  print(f"{'='*80}\n")
1124
 
 
 
 
 
 
 
 
1125
 
1126
  def construct_leaderboard_from_metadata():
1127
  """
@@ -1183,7 +1304,37 @@ def create_monthly_metrics_plot():
1183
  Each agent gets a unique color for both their line and bars.
1184
  Shows only top 5 agents by total issue count.
1185
  """
1186
- metrics = calculate_monthly_metrics_by_agent(top_n=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
 
1188
  if not metrics['agents'] or not metrics['months']:
1189
  # Return an empty figure with a message
@@ -1292,11 +1443,19 @@ def create_monthly_metrics_plot():
1292
 
1293
  def get_leaderboard_dataframe():
1294
  """
1295
- Construct leaderboard from issue metadata and convert to pandas DataFrame for display.
 
1296
  Returns formatted DataFrame sorted by total issues.
1297
  """
1298
- # Construct leaderboard from metadata
1299
- cache_dict = construct_leaderboard_from_metadata()
 
 
 
 
 
 
 
1300
 
1301
  if not cache_dict:
1302
  # Return empty DataFrame with correct columns if no data
 
938
  return token
939
 
940
 
941
+ def load_cached_leaderboard_and_metrics():
942
+ """
943
+ Load cached leaderboard and monthly metrics data from SWE-Arena/swe_leaderboards dataset.
944
+ This is much faster than constructing from scratch on every app launch.
945
+
946
+ Returns:
947
+ dict: {
948
+ 'leaderboard': dict of agent stats,
949
+ 'monthly_metrics': dict with agents, months, and data,
950
+ 'metadata': dict with last_updated, time_frame_days, total_agents
951
+ }
952
+ Returns None if cache doesn't exist or fails to load.
953
+ """
954
+ try:
955
+ token = get_hf_token()
956
+
957
+ print("📥 Loading cached leaderboard and metrics from HuggingFace...")
958
+
959
+ # Download cached file
960
+ cached_path = hf_hub_download(
961
+ repo_id="SWE-Arena/swe_leaderboards",
962
+ filename="swe-issue.json",
963
+ repo_type="dataset",
964
+ token=token
965
+ )
966
+
967
+ # Load JSON data
968
+ with open(cached_path, 'r', encoding='utf-8') as f:
969
+ data = json.load(f)
970
+
971
+ print(f" ✓ Loaded cached data (last updated: {data.get('metadata', {}).get('last_updated', 'Unknown')})")
972
+ print(f" ✓ Leaderboard entries: {len(data.get('leaderboard', {}))}")
973
+ print(f" ✓ Monthly metrics for: {len(data.get('monthly_metrics', {}).get('agents', []))} agents")
974
+
975
+ return data
976
+
977
+ except Exception as e:
978
+ print(f"⚠️ Could not load cached data: {str(e)}")
979
+ print(f" Falling back to constructing from issue metadata...")
980
+ return None
981
+
982
+
983
  def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, token, max_retries=5):
984
  """
985
  Upload file to HuggingFace with exponential backoff retry logic.
 
1067
  # DATA MANAGEMENT
1068
  # =============================================================================
1069
 
1070
+ def save_leaderboard_and_metrics_to_hf():
1071
+ """
1072
+ Save leaderboard data and monthly metrics to SWE-Arena/swe_leaderboards dataset.
1073
+ Creates a comprehensive JSON file with both leaderboard stats and monthly metrics.
1074
+ If the file exists, it will be overwritten.
1075
+
1076
+ Returns:
1077
+ bool: True if successful, False otherwise
1078
+ """
1079
+ import io
1080
+
1081
+ try:
1082
+ token = get_hf_token()
1083
+ if not token:
1084
+ raise Exception("No HuggingFace token found")
1085
+
1086
+ api = HfApi(token=token)
1087
+
1088
+ print(f"\n{'='*80}")
1089
+ print(f"📊 Preparing leaderboard and metrics data for upload...")
1090
+ print(f"{'='*80}\n")
1091
+
1092
+ # Get leaderboard data
1093
+ print(" Constructing leaderboard data...")
1094
+ leaderboard_data = construct_leaderboard_from_metadata()
1095
+
1096
+ # Get monthly metrics data (all agents, not just top N)
1097
+ print(" Calculating monthly metrics...")
1098
+ monthly_metrics = calculate_monthly_metrics_by_agent(top_n=None)
1099
+
1100
+ # Combine into a single structure
1101
+ combined_data = {
1102
+ "leaderboard": leaderboard_data,
1103
+ "monthly_metrics": monthly_metrics,
1104
+ "metadata": {
1105
+ "last_updated": datetime.now(timezone.utc).isoformat(),
1106
+ "time_frame_days": LEADERBOARD_TIME_FRAME_DAYS,
1107
+ "total_agents": len(leaderboard_data)
1108
+ }
1109
+ }
1110
+
1111
+ print(f" Leaderboard entries: {len(leaderboard_data)}")
1112
+ print(f" Monthly metrics for: {len(monthly_metrics['agents'])} agents")
1113
+ print(f" Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days")
1114
+
1115
+ # Convert to JSON and create file-like object
1116
+ json_content = json.dumps(combined_data, indent=2)
1117
+ file_like_object = io.BytesIO(json_content.encode('utf-8'))
1118
+
1119
+ # Upload to HuggingFace (will overwrite if exists)
1120
+ print(f"\n🤗 Uploading to SWE-Arena/swe_leaderboards...")
1121
+ api.upload_file(
1122
+ path_or_fileobj=file_like_object,
1123
+ path_in_repo="swe-issue.json",
1124
+ repo_id="SWE-Arena/swe_leaderboards",
1125
+ repo_type="dataset",
1126
+ token=token,
1127
+ commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
1128
+ )
1129
+
1130
+ print(f" ✓ Successfully uploaded swe-issue.json")
1131
+ print(f"{'='*80}\n")
1132
+
1133
+ return True
1134
+
1135
+ except Exception as e:
1136
+ print(f"✗ Error saving leaderboard and metrics: {str(e)}")
1137
+ import traceback
1138
+ traceback.print_exc()
1139
+ return False
1140
+
1141
+
1142
  def mine_all_agents():
1143
  """
1144
  Mine issue metadata for all agents within UPDATE_TIME_FRAME_DAYS and save to HuggingFace.
 
1236
  print(f" BigQuery queries executed: 1")
1237
  print(f"{'='*80}\n")
1238
 
1239
+ # After mining is complete, save leaderboard and metrics to HuggingFace
1240
+ print(f"📤 Uploading leaderboard and metrics data...")
1241
+ if save_leaderboard_and_metrics_to_hf():
1242
+ print(f"✓ Leaderboard and metrics successfully uploaded to SWE-Arena/swe_leaderboards")
1243
+ else:
1244
+ print(f"⚠️ Failed to upload leaderboard and metrics data")
1245
+
1246
 
1247
  def construct_leaderboard_from_metadata():
1248
  """
 
1304
  Each agent gets a unique color for both their line and bars.
1305
  Shows only top 5 agents by total issue count.
1306
  """
1307
+ # Try to load from cache first
1308
+ cached_data = load_cached_leaderboard_and_metrics()
1309
+
1310
+ if cached_data and 'monthly_metrics' in cached_data:
1311
+ # Use cached monthly metrics
1312
+ all_metrics = cached_data['monthly_metrics']
1313
+
1314
+ # Filter to top 5 agents by total issue count
1315
+ if all_metrics.get('agents') and all_metrics.get('data'):
1316
+ # Calculate total issues for each agent
1317
+ agent_totals = []
1318
+ for agent_name in all_metrics['agents']:
1319
+ total_issues = sum(all_metrics['data'][agent_name]['total_issues'])
1320
+ agent_totals.append((agent_name, total_issues))
1321
+
1322
+ # Sort and take top 5
1323
+ agent_totals.sort(key=lambda x: x[1], reverse=True)
1324
+ top_agents = [agent_name for agent_name, _ in agent_totals[:5]]
1325
+
1326
+ # Filter metrics to only include top agents
1327
+ metrics = {
1328
+ 'agents': top_agents,
1329
+ 'months': all_metrics['months'],
1330
+ 'data': {agent: all_metrics['data'][agent] for agent in top_agents if agent in all_metrics['data']}
1331
+ }
1332
+ else:
1333
+ metrics = all_metrics
1334
+ else:
1335
+ # Fallback: Calculate from issue metadata
1336
+ print(" Calculating monthly metrics from issue metadata...")
1337
+ metrics = calculate_monthly_metrics_by_agent(top_n=5)
1338
 
1339
  if not metrics['agents'] or not metrics['months']:
1340
  # Return an empty figure with a message
 
1443
 
1444
  def get_leaderboard_dataframe():
1445
  """
1446
+ Load leaderboard from cached data and convert to pandas DataFrame for display.
1447
+ Falls back to constructing from issue metadata if cache is unavailable.
1448
  Returns formatted DataFrame sorted by total issues.
1449
  """
1450
+ # Try to load from cache first
1451
+ cached_data = load_cached_leaderboard_and_metrics()
1452
+
1453
+ if cached_data and 'leaderboard' in cached_data:
1454
+ cache_dict = cached_data['leaderboard']
1455
+ else:
1456
+ # Fallback: Construct leaderboard from metadata
1457
+ print(" Constructing leaderboard from issue metadata...")
1458
+ cache_dict = construct_leaderboard_from_metadata()
1459
 
1460
  if not cache_dict:
1461
  # Return empty DataFrame with correct columns if no data
msr.py CHANGED
@@ -21,6 +21,7 @@ load_dotenv()
21
 
22
  AGENTS_REPO = "SWE-Arena/swe_agents"
23
  ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata"
 
24
  LEADERBOARD_TIME_FRAME_DAYS = 3 # Time frame for leaderboard
25
 
26
  # =============================================================================
@@ -464,6 +465,224 @@ def load_agents_from_hf():
464
  return []
465
 
466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  # =============================================================================
468
  # MAIN MINING FUNCTION
469
  # =============================================================================
@@ -562,6 +781,13 @@ def mine_all_agents():
562
  print(f" BigQuery queries executed: 1")
563
  print(f"{'='*80}\n")
564
 
 
 
 
 
 
 
 
565
 
566
  # =============================================================================
567
  # ENTRY POINT
 
21
 
22
  AGENTS_REPO = "SWE-Arena/swe_agents"
23
  ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata"
24
+ LEADERBOARD_REPO = "SWE-Arena/swe_leaderboards"
25
  LEADERBOARD_TIME_FRAME_DAYS = 3 # Time frame for leaderboard
26
 
27
  # =============================================================================
 
465
  return []
466
 
467
 
468
+ # =============================================================================
469
+ # LEADERBOARD CALCULATION FUNCTIONS
470
+ # =============================================================================
471
+
472
+ def calculate_issue_stats_from_metadata(metadata_list):
473
+ """
474
+ Calculate statistics from a list of issue metadata.
475
+
476
+ Returns:
477
+ dict: Issue statistics including total, closed, resolved counts and rate
478
+ """
479
+ total_issues = len(metadata_list)
480
+
481
+ # Count closed issues (those with closed_at timestamp)
482
+ closed_issues = sum(1 for issue_meta in metadata_list
483
+ if issue_meta.get('closed_at') is not None)
484
+
485
+ # Count completed issues (subset of closed issues with state_reason="completed")
486
+ completed = sum(1 for issue_meta in metadata_list
487
+ if issue_meta.get('state_reason') == 'completed')
488
+
489
+ # Calculate resolved rate as: completed / closed (not completed / total)
490
+ resolved_rate = (completed / closed_issues * 100) if closed_issues > 0 else 0
491
+
492
+ return {
493
+ 'total_issues': total_issues,
494
+ 'closed_issues': closed_issues,
495
+ 'resolved_issues': completed,
496
+ 'resolved_rate': round(resolved_rate, 2),
497
+ }
498
+
499
+
500
+ def calculate_monthly_metrics(all_metadata, agents):
501
+ """
502
+ Calculate monthly metrics for all agents for visualization.
503
+
504
+ Args:
505
+ all_metadata: Dictionary mapping agent_identifier to list of issue metadata
506
+ agents: List of agent dictionaries with metadata
507
+
508
+ Returns:
509
+ dict: {
510
+ 'agents': list of agent names,
511
+ 'months': list of month labels (e.g., '2025-01'),
512
+ 'data': {
513
+ agent_name: {
514
+ 'resolved_rates': list of resolved rates by month,
515
+ 'total_issues': list of issue counts by month,
516
+ 'resolved_issues': list of resolved issue counts by month
517
+ }
518
+ }
519
+ }
520
+ """
521
+ # Create mapping from agent_identifier to agent_name
522
+ identifier_to_name = {
523
+ agent.get('github_identifier'): agent.get('name', agent.get('agent_name', agent.get('github_identifier')))
524
+ for agent in agents if agent.get('github_identifier')
525
+ }
526
+
527
+ # Group by agent and month
528
+ agent_month_data = defaultdict(lambda: defaultdict(list))
529
+
530
+ for identifier, metadata_list in all_metadata.items():
531
+ agent_name = identifier_to_name.get(identifier, identifier)
532
+
533
+ for issue_meta in metadata_list:
534
+ created_at = issue_meta.get('created_at')
535
+ if not created_at:
536
+ continue
537
+
538
+ try:
539
+ dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
540
+ month_key = f"{dt.year}-{dt.month:02d}"
541
+ agent_month_data[agent_name][month_key].append(issue_meta)
542
+ except Exception as e:
543
+ print(f"Warning: Could not parse date '{created_at}': {e}")
544
+ continue
545
+
546
+ # Get all unique months and sort them
547
+ all_months = set()
548
+ for agent_data in agent_month_data.values():
549
+ all_months.update(agent_data.keys())
550
+ months = sorted(list(all_months))
551
+
552
+ # Calculate metrics for each agent and month
553
+ result_data = {}
554
+ for agent_name, month_dict in agent_month_data.items():
555
+ resolved_rates = []
556
+ total_issues_list = []
557
+ resolved_issues_list = []
558
+
559
+ for month in months:
560
+ issues_in_month = month_dict.get(month, [])
561
+
562
+ # Count completed issues (those with state_reason="completed")
563
+ completed_count = sum(1 for issue in issues_in_month if issue.get('state_reason') == 'completed')
564
+
565
+ # Count closed issues (those with closed_at timestamp)
566
+ closed_count = sum(1 for issue in issues_in_month if issue.get('closed_at') is not None)
567
+
568
+ # Total issues created in this month
569
+ total_count = len(issues_in_month)
570
+
571
+ # Calculate resolved rate as: completed / closed (not completed / total)
572
+ resolved_rate = (completed_count / closed_count * 100) if closed_count > 0 else None
573
+
574
+ resolved_rates.append(resolved_rate)
575
+ total_issues_list.append(total_count)
576
+ resolved_issues_list.append(completed_count)
577
+
578
+ result_data[agent_name] = {
579
+ 'resolved_rates': resolved_rates,
580
+ 'total_issues': total_issues_list,
581
+ 'resolved_issues': resolved_issues_list
582
+ }
583
+
584
+ agents_list = sorted(list(agent_month_data.keys()))
585
+
586
+ return {
587
+ 'agents': agents_list,
588
+ 'months': months,
589
+ 'data': result_data
590
+ }
591
+
592
+
593
+ def save_leaderboard_and_metrics_to_hf(all_metadata, agents):
594
+ """
595
+ Save leaderboard data and monthly metrics to SWE-Arena/swe_leaderboards dataset.
596
+ Creates a comprehensive JSON file with both leaderboard stats and monthly metrics.
597
+ If the file exists, it will be overwritten.
598
+
599
+ Args:
600
+ all_metadata: Dictionary mapping agent_identifier to list of issue metadata
601
+ agents: List of agent dictionaries with metadata
602
+
603
+ Returns:
604
+ bool: True if successful, False otherwise
605
+ """
606
+ import io
607
+
608
+ try:
609
+ token = get_hf_token()
610
+ if not token:
611
+ raise Exception("No HuggingFace token found")
612
+
613
+ api = HfApi(token=token)
614
+
615
+ print(f"\n{'='*80}")
616
+ print(f"📊 Preparing leaderboard and metrics data for upload...")
617
+ print(f"{'='*80}\n")
618
+
619
+ # Build leaderboard data
620
+ print(" Constructing leaderboard data...")
621
+ leaderboard_data = {}
622
+
623
+ for agent in agents:
624
+ identifier = agent.get('github_identifier')
625
+ agent_name = agent.get('name', agent.get('agent_name', 'Unknown'))
626
+
627
+ if not identifier:
628
+ continue
629
+
630
+ metadata = all_metadata.get(identifier, [])
631
+ stats = calculate_issue_stats_from_metadata(metadata)
632
+
633
+ leaderboard_data[identifier] = {
634
+ 'agent_name': agent_name,
635
+ 'website': agent.get('website', 'N/A'),
636
+ 'github_identifier': identifier,
637
+ **stats
638
+ }
639
+
640
+ # Get monthly metrics data
641
+ print(" Calculating monthly metrics...")
642
+ monthly_metrics = calculate_monthly_metrics(all_metadata, agents)
643
+
644
+ # Combine into a single structure
645
+ combined_data = {
646
+ "leaderboard": leaderboard_data,
647
+ "monthly_metrics": monthly_metrics,
648
+ "metadata": {
649
+ "last_updated": datetime.now(timezone.utc).isoformat(),
650
+ "time_frame_days": LEADERBOARD_TIME_FRAME_DAYS,
651
+ "total_agents": len(leaderboard_data)
652
+ }
653
+ }
654
+
655
+ print(f" Leaderboard entries: {len(leaderboard_data)}")
656
+ print(f" Monthly metrics for: {len(monthly_metrics['agents'])} agents")
657
+ print(f" Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days")
658
+
659
+ # Convert to JSON and create file-like object
660
+ json_content = json.dumps(combined_data, indent=2)
661
+ file_like_object = io.BytesIO(json_content.encode('utf-8'))
662
+
663
+ # Upload to HuggingFace (will overwrite if exists)
664
+ print(f"\n🤗 Uploading to {LEADERBOARD_REPO}...")
665
+ api.upload_file(
666
+ path_or_fileobj=file_like_object,
667
+ path_in_repo="swe-issue.json",
668
+ repo_id=LEADERBOARD_REPO,
669
+ repo_type="dataset",
670
+ token=token,
671
+ commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
672
+ )
673
+
674
+ print(f" ✓ Successfully uploaded swe-issue.json")
675
+ print(f"{'='*80}\n")
676
+
677
+ return True
678
+
679
+ except Exception as e:
680
+ print(f"✗ Error saving leaderboard and metrics: {str(e)}")
681
+ import traceback
682
+ traceback.print_exc()
683
+ return False
684
+
685
+
686
  # =============================================================================
687
  # MAIN MINING FUNCTION
688
  # =============================================================================
 
781
  print(f" BigQuery queries executed: 1")
782
  print(f"{'='*80}\n")
783
 
784
+ # After mining is complete, save leaderboard and metrics to HuggingFace
785
+ print(f"📤 Uploading leaderboard and metrics data...")
786
+ if save_leaderboard_and_metrics_to_hf(all_metadata, agents):
787
+ print(f"✓ Leaderboard and metrics successfully uploaded to {LEADERBOARD_REPO}")
788
+ else:
789
+ print(f"⚠️ Failed to upload leaderboard and metrics data")
790
+
791
 
792
  # =============================================================================
793
  # ENTRY POINT