zhimin-z commited on
Commit
3da62f9
·
1 Parent(s): 5998589
Files changed (1) hide show
  1. msr.py +11 -11
msr.py CHANGED
@@ -50,7 +50,7 @@ UPLOAD_DELAY_SECONDS = 5
50
  UPLOAD_MAX_BACKOFF = 3600
51
 
52
  # Scheduler configuration
53
- SCHEDULE_ENABLED = True
54
  SCHEDULE_DAY_OF_WEEK = 'sun' # Sunday
55
  SCHEDULE_HOUR = 0
56
  SCHEDULE_MINUTE = 0
@@ -81,7 +81,7 @@ def save_jsonl(filename, data):
81
  """Save list of dictionaries to JSONL file."""
82
  with open(filename, 'w', encoding='utf-8') as f:
83
  for item in data:
84
- f.write(json.dumps(item) + '\\n')
85
 
86
 
87
  def normalize_date_format(date_string):
@@ -448,7 +448,7 @@ def fetch_all_issue_metadata_streaming(conn, identifiers, start_date, end_date):
448
  print(f"✓ {batch_issues} issues found")
449
 
450
  except Exception as e:
451
- print(f"\\n ✗ Batch {batch_num} error: {str(e)}")
452
  import traceback
453
  traceback.print_exc()
454
 
@@ -457,7 +457,7 @@ def fetch_all_issue_metadata_streaming(conn, identifiers, start_date, end_date):
457
 
458
  # Final summary
459
  agents_with_data = sum(1 for issues in metadata_by_agent.values() if issues)
460
- print(f"\\n ✓ Complete: {total_issues} issues found for {agents_with_data}/{len(identifiers)} agents")
461
 
462
  return dict(metadata_by_agent)
463
 
@@ -499,7 +499,7 @@ def sync_agents_repo():
499
  print(f" ✓ Repository synced successfully")
500
  if output:
501
  # Print first few lines of output
502
- lines = output.split('\\n')[:5]
503
  for line in lines:
504
  print(f" {line}")
505
  return True
@@ -736,12 +736,12 @@ def mine_all_agents():
736
  Mine issue metadata for all agents using STREAMING batch processing.
737
  Downloads GHArchive data, then uses BATCH-based DuckDB queries.
738
  """
739
- print(f"\\n[1/4] Downloading GHArchive data...")
740
 
741
  if not download_all_gharchive_data():
742
  print("Warning: Download had errors, continuing with available data...")
743
 
744
- print(f"\\n[2/4] Loading agent metadata...")
745
 
746
  agents = load_agents_from_hf()
747
  if not agents:
@@ -753,7 +753,7 @@ def mine_all_agents():
753
  print("Error: No valid agent identifiers found")
754
  return
755
 
756
- print(f"\\n[3/4] Mining issue metadata ({len(identifiers)} agents, {LEADERBOARD_TIME_FRAME_DAYS} days)...")
757
 
758
  try:
759
  conn = get_duckdb_connection()
@@ -779,7 +779,7 @@ def mine_all_agents():
779
  finally:
780
  conn.close()
781
 
782
- print(f"\\n[4/4] Saving leaderboard...")
783
 
784
  try:
785
  leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, agents)
@@ -825,9 +825,9 @@ def setup_scheduler():
825
  from datetime import datetime
826
  next_run = trigger.get_next_fire_time(None, datetime.now(trigger.timezone))
827
  print(f"Scheduler: Weekly on {SCHEDULE_DAY_OF_WEEK} at {SCHEDULE_HOUR:02d}:{SCHEDULE_MINUTE:02d} {SCHEDULE_TIMEZONE}")
828
- print(f"Next run: {next_run}\\n")
829
 
830
- print(f"\\nScheduler started")
831
  scheduler.start()
832
 
833
 
 
50
  UPLOAD_MAX_BACKOFF = 3600
51
 
52
  # Scheduler configuration
53
+ SCHEDULE_ENABLED = False
54
  SCHEDULE_DAY_OF_WEEK = 'sun' # Sunday
55
  SCHEDULE_HOUR = 0
56
  SCHEDULE_MINUTE = 0
 
81
  """Save list of dictionaries to JSONL file."""
82
  with open(filename, 'w', encoding='utf-8') as f:
83
  for item in data:
84
+ f.write(json.dumps(item) + '\n')
85
 
86
 
87
  def normalize_date_format(date_string):
 
448
  print(f"✓ {batch_issues} issues found")
449
 
450
  except Exception as e:
451
+ print(f"\n ✗ Batch {batch_num} error: {str(e)}")
452
  import traceback
453
  traceback.print_exc()
454
 
 
457
 
458
  # Final summary
459
  agents_with_data = sum(1 for issues in metadata_by_agent.values() if issues)
460
+ print(f"\n ✓ Complete: {total_issues} issues found for {agents_with_data}/{len(identifiers)} agents")
461
 
462
  return dict(metadata_by_agent)
463
 
 
499
  print(f" ✓ Repository synced successfully")
500
  if output:
501
  # Print first few lines of output
502
+ lines = output.split('\n')[:5]
503
  for line in lines:
504
  print(f" {line}")
505
  return True
 
736
  Mine issue metadata for all agents using STREAMING batch processing.
737
  Downloads GHArchive data, then uses BATCH-based DuckDB queries.
738
  """
739
+ print(f"\n[1/4] Downloading GHArchive data...")
740
 
741
  if not download_all_gharchive_data():
742
  print("Warning: Download had errors, continuing with available data...")
743
 
744
+ print(f"\n[2/4] Loading agent metadata...")
745
 
746
  agents = load_agents_from_hf()
747
  if not agents:
 
753
  print("Error: No valid agent identifiers found")
754
  return
755
 
756
+ print(f"\n[3/4] Mining issue metadata ({len(identifiers)} agents, {LEADERBOARD_TIME_FRAME_DAYS} days)...")
757
 
758
  try:
759
  conn = get_duckdb_connection()
 
779
  finally:
780
  conn.close()
781
 
782
+ print(f"\n[4/4] Saving leaderboard...")
783
 
784
  try:
785
  leaderboard_dict = construct_leaderboard_from_metadata(all_metadata, agents)
 
825
  from datetime import datetime
826
  next_run = trigger.get_next_fire_time(None, datetime.now(trigger.timezone))
827
  print(f"Scheduler: Weekly on {SCHEDULE_DAY_OF_WEEK} at {SCHEDULE_HOUR:02d}:{SCHEDULE_MINUTE:02d} {SCHEDULE_TIMEZONE}")
828
+ print(f"Next run: {next_run}\n")
829
 
830
+ print(f"\nScheduler started")
831
  scheduler.start()
832
 
833