zhimin-z
commited on
Commit
·
fd8a707
1
Parent(s):
6a46881
init
Browse files- .github/workflows/hf_sync.yml +1 -1
- README.md +11 -11
- app.py +23 -23
- msr.py +0 -1
.github/workflows/hf_sync.yml
CHANGED
|
@@ -30,6 +30,6 @@ jobs:
|
|
| 30 |
env:
|
| 31 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 32 |
run: |
|
| 33 |
-
git remote add huggingface https://user:${HF_TOKEN}@huggingface.co/spaces/SWE-Arena/SWE-
|
| 34 |
git fetch huggingface
|
| 35 |
git push huggingface main --force
|
|
|
|
| 30 |
env:
|
| 31 |
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 32 |
run: |
|
| 33 |
+
git remote add huggingface https://user:${HF_TOKEN}@huggingface.co/spaces/SWE-Arena/SWE-Team
|
| 34 |
git fetch huggingface
|
| 35 |
git push huggingface main --force
|
README.md
CHANGED
|
@@ -13,27 +13,27 @@ short_description: Track GitHub team management statistics for SWE assistants
|
|
| 13 |
|
| 14 |
# SWE Agent Team Management Leaderboard
|
| 15 |
|
| 16 |
-
SWE-Team ranks software engineering assistants by their real-world GitHub
|
| 17 |
|
| 18 |
-
No benchmarks. No sandboxes. Just real
|
| 19 |
|
| 20 |
## Why This Exists
|
| 21 |
|
| 22 |
-
Most AI coding agent benchmarks use synthetic tasks and simulated environments. This leaderboard measures real-world activity: how many
|
| 23 |
|
| 24 |
-
If an agent is consistently
|
| 25 |
|
| 26 |
## What We Track
|
| 27 |
|
| 28 |
Key metrics from the last 180 days:
|
| 29 |
|
| 30 |
**Leaderboard Table**
|
| 31 |
-
- **
|
| 32 |
- **Agent Name**: Display name of the agent
|
| 33 |
- **Website**: Link to the agent's homepage or documentation
|
| 34 |
|
| 35 |
**Monthly Trends**
|
| 36 |
-
-
|
| 37 |
- Activity patterns across months
|
| 38 |
|
| 39 |
We focus on 180 days to highlight current capabilities and active assistants.
|
|
@@ -42,8 +42,8 @@ We focus on 180 days to highlight current capabilities and active assistants.
|
|
| 42 |
|
| 43 |
**Data Collection**
|
| 44 |
We mine GitHub activity from [GHArchive](https://www.gharchive.org/), tracking:
|
| 45 |
-
-
|
| 46 |
-
- Monthly
|
| 47 |
|
| 48 |
**Regular Updates**
|
| 49 |
Leaderboard refreshes weekly (Tuesday at 00:00 UTC).
|
|
@@ -56,8 +56,8 @@ Anyone can submit an agent. We store metadata in `SWE-Arena/bot_metadata` and re
|
|
| 56 |
### Browsing
|
| 57 |
Leaderboard tab features:
|
| 58 |
- Searchable table (by agent name or website)
|
| 59 |
-
- Monthly charts (
|
| 60 |
-
- Sortable columns (by
|
| 61 |
|
| 62 |
### Adding Your Agent
|
| 63 |
Submit Agent tab requires:
|
|
@@ -72,7 +72,7 @@ Submissions are validated against GitHub's API and data loads automatically duri
|
|
| 72 |
|
| 73 |
Planned improvements:
|
| 74 |
- Repository-based analysis (which repos are agents managing)
|
| 75 |
-
- Extended metrics (member roles, access levels)
|
| 76 |
- Organization and team breakdown
|
| 77 |
- Member management patterns (invitations, removals, role changes)
|
| 78 |
|
|
|
|
| 13 |
|
| 14 |
# SWE Agent Team Management Leaderboard
|
| 15 |
|
| 16 |
+
SWE-Team ranks software engineering assistants by their real-world GitHub membership event activity.
|
| 17 |
|
| 18 |
+
No benchmarks. No sandboxes. Just real membership events tracked from public repositories.
|
| 19 |
|
| 20 |
## Why This Exists
|
| 21 |
|
| 22 |
+
Most AI coding agent benchmarks use synthetic tasks and simulated environments. This leaderboard measures real-world activity: how many membership events is the agent generating? How active is it across different projects? Is the agent's usage growing?
|
| 23 |
|
| 24 |
+
If an agent is consistently generating membership events across different projects, that tells you something no benchmark can.
|
| 25 |
|
| 26 |
## What We Track
|
| 27 |
|
| 28 |
Key metrics from the last 180 days:
|
| 29 |
|
| 30 |
**Leaderboard Table**
|
| 31 |
+
- **Total Membership Events**: Number of team membership changes (e.g., adding or removing members) performed by the agent
|
| 32 |
- **Agent Name**: Display name of the agent
|
| 33 |
- **Website**: Link to the agent's homepage or documentation
|
| 34 |
|
| 35 |
**Monthly Trends**
|
| 36 |
+
- Membership event volume over time (bar charts)
|
| 37 |
- Activity patterns across months
|
| 38 |
|
| 39 |
We focus on 180 days to highlight current capabilities and active assistants.
|
|
|
|
| 42 |
|
| 43 |
**Data Collection**
|
| 44 |
We mine GitHub activity from [GHArchive](https://www.gharchive.org/), tracking:
|
| 45 |
+
- Membership events by the agent (`MemberEvent` data)
|
| 46 |
+
- Monthly membership event volumes and trends
|
| 47 |
|
| 48 |
**Regular Updates**
|
| 49 |
Leaderboard refreshes weekly (Tuesday at 00:00 UTC).
|
|
|
|
| 56 |
### Browsing
|
| 57 |
Leaderboard tab features:
|
| 58 |
- Searchable table (by agent name or website)
|
| 59 |
+
- Monthly charts (membership event volumes and activity trends)
|
| 60 |
+
- Sortable columns (by total membership events)
|
| 61 |
|
| 62 |
### Adding Your Agent
|
| 63 |
Submit Agent tab requires:
|
|
|
|
| 72 |
|
| 73 |
Planned improvements:
|
| 74 |
- Repository-based analysis (which repos are agents managing)
|
| 75 |
+
- Extended metrics (membership event types, member roles, access levels)
|
| 76 |
- Organization and team breakdown
|
| 77 |
- Member management patterns (invitations, removals, role changes)
|
| 78 |
|
app.py
CHANGED
|
@@ -29,7 +29,7 @@ MAX_RETRIES = 5
|
|
| 29 |
LEADERBOARD_COLUMNS = [
|
| 30 |
("Agent Name", "string"),
|
| 31 |
("Website", "string"),
|
| 32 |
-
("
|
| 33 |
]
|
| 34 |
|
| 35 |
# =============================================================================
|
|
@@ -268,7 +268,7 @@ def load_leaderboard_data_from_hf():
|
|
| 268 |
|
| 269 |
def create_monthly_metrics_plot(top_n=5):
|
| 270 |
"""
|
| 271 |
-
Create a Plotly figure showing monthly
|
| 272 |
|
| 273 |
Args:
|
| 274 |
top_n: Number of top agents to show (default: 5)
|
|
@@ -297,14 +297,14 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 297 |
|
| 298 |
# Apply top_n filter if specified
|
| 299 |
if top_n is not None and top_n > 0 and metrics.get('agents'):
|
| 300 |
-
# Calculate
|
| 301 |
agent_totals = []
|
| 302 |
for agent_name in metrics['agents']:
|
| 303 |
agent_data = metrics['data'].get(agent_name, {})
|
| 304 |
-
|
| 305 |
-
agent_totals.append((agent_name,
|
| 306 |
|
| 307 |
-
# Sort by
|
| 308 |
agent_totals.sort(key=lambda x: x[1], reverse=True)
|
| 309 |
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
|
| 310 |
|
|
@@ -372,7 +372,7 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 372 |
marker=dict(color=color, opacity=0.7),
|
| 373 |
hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
|
| 374 |
'Month: %{x}<br>' +
|
| 375 |
-
'
|
| 376 |
'<extra></extra>',
|
| 377 |
offsetgroup=agent_name # Group bars by agent for proper spacing
|
| 378 |
)
|
|
@@ -380,7 +380,7 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 380 |
|
| 381 |
# Update axes labels
|
| 382 |
fig.update_xaxes(title_text=None)
|
| 383 |
-
fig.update_yaxes(title_text="<b>
|
| 384 |
|
| 385 |
# Update layout
|
| 386 |
show_legend = (top_n is not None and top_n <= 10)
|
|
@@ -399,7 +399,7 @@ def create_monthly_metrics_plot(top_n=5):
|
|
| 399 |
def get_leaderboard_dataframe():
|
| 400 |
"""
|
| 401 |
Load leaderboard from saved dataset and convert to pandas DataFrame for display.
|
| 402 |
-
Returns formatted DataFrame sorted by
|
| 403 |
"""
|
| 404 |
# Load from saved dataset
|
| 405 |
saved_data = load_leaderboard_data_from_hf()
|
|
@@ -424,11 +424,11 @@ def get_leaderboard_dataframe():
|
|
| 424 |
rows = []
|
| 425 |
filtered_count = 0
|
| 426 |
for identifier, data in cache_dict.items():
|
| 427 |
-
|
| 428 |
-
print(f" Agent '{identifier}': {
|
| 429 |
|
| 430 |
-
# Filter out agents with zero
|
| 431 |
-
if
|
| 432 |
filtered_count += 1
|
| 433 |
continue
|
| 434 |
|
|
@@ -436,10 +436,10 @@ def get_leaderboard_dataframe():
|
|
| 436 |
rows.append([
|
| 437 |
data.get('name', 'Unknown'),
|
| 438 |
data.get('website', 'N/A'),
|
| 439 |
-
|
| 440 |
])
|
| 441 |
|
| 442 |
-
print(f"Filtered out {filtered_count} agents with 0
|
| 443 |
print(f"Leaderboard will show {len(rows)} agents")
|
| 444 |
|
| 445 |
# Create DataFrame
|
|
@@ -447,14 +447,14 @@ def get_leaderboard_dataframe():
|
|
| 447 |
df = pd.DataFrame(rows, columns=column_names)
|
| 448 |
|
| 449 |
# Ensure numeric types
|
| 450 |
-
numeric_cols = ["
|
| 451 |
for col in numeric_cols:
|
| 452 |
if col in df.columns:
|
| 453 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 454 |
|
| 455 |
-
# Sort by
|
| 456 |
-
if "
|
| 457 |
-
df = df.sort_values(by="
|
| 458 |
|
| 459 |
print(f"Final DataFrame shape: {df.shape}")
|
| 460 |
print("="*60 + "\n")
|
|
@@ -509,7 +509,7 @@ def submit_agent(identifier, agent_name, organization, website):
|
|
| 509 |
return "ERROR: Failed to save submission", gr.update()
|
| 510 |
|
| 511 |
# Return success message - data will be populated by backend updates
|
| 512 |
-
return f"SUCCESS: Successfully submitted {agent_name}!
|
| 513 |
|
| 514 |
|
| 515 |
# =============================================================================
|
|
@@ -566,13 +566,13 @@ print(f"{'='*80}\n")
|
|
| 566 |
# Create Gradio interface
|
| 567 |
with gr.Blocks(title="SWE Agent Member Leaderboard", theme=gr.themes.Soft()) as app:
|
| 568 |
gr.Markdown("# SWE Agent Member Leaderboard")
|
| 569 |
-
gr.Markdown(f"Track and compare
|
| 570 |
|
| 571 |
with gr.Tabs():
|
| 572 |
|
| 573 |
# Leaderboard Tab
|
| 574 |
with gr.Tab("Leaderboard"):
|
| 575 |
-
gr.Markdown("*Statistics are based on
|
| 576 |
leaderboard_table = Leaderboard(
|
| 577 |
value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
|
| 578 |
datatype=LEADERBOARD_COLUMNS,
|
|
@@ -590,7 +590,7 @@ with gr.Blocks(title="SWE Agent Member Leaderboard", theme=gr.themes.Soft()) as
|
|
| 590 |
# Monthly Metrics Section
|
| 591 |
gr.Markdown("---") # Divider
|
| 592 |
gr.Markdown("### Monthly Performance - Top 5 Agents")
|
| 593 |
-
gr.Markdown("*Shows
|
| 594 |
|
| 595 |
monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
|
| 596 |
|
|
|
|
| 29 |
LEADERBOARD_COLUMNS = [
|
| 30 |
("Agent Name", "string"),
|
| 31 |
("Website", "string"),
|
| 32 |
+
("Total Membership Events", "number"),
|
| 33 |
]
|
| 34 |
|
| 35 |
# =============================================================================
|
|
|
|
| 268 |
|
| 269 |
def create_monthly_metrics_plot(top_n=5):
|
| 270 |
"""
|
| 271 |
+
Create a Plotly figure showing monthly total membership events as bar charts.
|
| 272 |
|
| 273 |
Args:
|
| 274 |
top_n: Number of top agents to show (default: 5)
|
|
|
|
| 297 |
|
| 298 |
# Apply top_n filter if specified
|
| 299 |
if top_n is not None and top_n > 0 and metrics.get('agents'):
|
| 300 |
+
# Calculate total membership events for each agent
|
| 301 |
agent_totals = []
|
| 302 |
for agent_name in metrics['agents']:
|
| 303 |
agent_data = metrics['data'].get(agent_name, {})
|
| 304 |
+
total_membership_events = sum(agent_data.get('total_members', []))
|
| 305 |
+
agent_totals.append((agent_name, total_membership_events))
|
| 306 |
|
| 307 |
+
# Sort by total membership events and take top N
|
| 308 |
agent_totals.sort(key=lambda x: x[1], reverse=True)
|
| 309 |
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
|
| 310 |
|
|
|
|
| 372 |
marker=dict(color=color, opacity=0.7),
|
| 373 |
hovertemplate='<b>Agent: %{fullData.name}</b><br>' +
|
| 374 |
'Month: %{x}<br>' +
|
| 375 |
+
'Total Membership Events: %{y}<br>' +
|
| 376 |
'<extra></extra>',
|
| 377 |
offsetgroup=agent_name # Group bars by agent for proper spacing
|
| 378 |
)
|
|
|
|
| 380 |
|
| 381 |
# Update axes labels
|
| 382 |
fig.update_xaxes(title_text=None)
|
| 383 |
+
fig.update_yaxes(title_text="<b>Total Membership Events</b>")
|
| 384 |
|
| 385 |
# Update layout
|
| 386 |
show_legend = (top_n is not None and top_n <= 10)
|
|
|
|
| 399 |
def get_leaderboard_dataframe():
|
| 400 |
"""
|
| 401 |
Load leaderboard from saved dataset and convert to pandas DataFrame for display.
|
| 402 |
+
Returns formatted DataFrame sorted by total membership events.
|
| 403 |
"""
|
| 404 |
# Load from saved dataset
|
| 405 |
saved_data = load_leaderboard_data_from_hf()
|
|
|
|
| 424 |
rows = []
|
| 425 |
filtered_count = 0
|
| 426 |
for identifier, data in cache_dict.items():
|
| 427 |
+
total_membership_events = data.get('total_members', 0)
|
| 428 |
+
print(f" Agent '{identifier}': {total_membership_events} total membership events")
|
| 429 |
|
| 430 |
+
# Filter out agents with zero membership events
|
| 431 |
+
if total_membership_events == 0:
|
| 432 |
filtered_count += 1
|
| 433 |
continue
|
| 434 |
|
|
|
|
| 436 |
rows.append([
|
| 437 |
data.get('name', 'Unknown'),
|
| 438 |
data.get('website', 'N/A'),
|
| 439 |
+
total_membership_events,
|
| 440 |
])
|
| 441 |
|
| 442 |
+
print(f"Filtered out {filtered_count} agents with 0 total membership events")
|
| 443 |
print(f"Leaderboard will show {len(rows)} agents")
|
| 444 |
|
| 445 |
# Create DataFrame
|
|
|
|
| 447 |
df = pd.DataFrame(rows, columns=column_names)
|
| 448 |
|
| 449 |
# Ensure numeric types
|
| 450 |
+
numeric_cols = ["Total Membership Events"]
|
| 451 |
for col in numeric_cols:
|
| 452 |
if col in df.columns:
|
| 453 |
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
| 454 |
|
| 455 |
+
# Sort by Total Membership Events descending
|
| 456 |
+
if "Total Membership Events" in df.columns and not df.empty:
|
| 457 |
+
df = df.sort_values(by="Total Membership Events", ascending=False).reset_index(drop=True)
|
| 458 |
|
| 459 |
print(f"Final DataFrame shape: {df.shape}")
|
| 460 |
print("="*60 + "\n")
|
|
|
|
| 509 |
return "ERROR: Failed to save submission", gr.update()
|
| 510 |
|
| 511 |
# Return success message - data will be populated by backend updates
|
| 512 |
+
return f"SUCCESS: Successfully submitted {agent_name}! Total membership events data will be automatically populated by the backend system via the maintainers.", gr.update()
|
| 513 |
|
| 514 |
|
| 515 |
# =============================================================================
|
|
|
|
| 566 |
# Create Gradio interface
|
| 567 |
with gr.Blocks(title="SWE Agent Member Leaderboard", theme=gr.themes.Soft()) as app:
|
| 568 |
gr.Markdown("# SWE Agent Member Leaderboard")
|
| 569 |
+
gr.Markdown(f"Track and compare total membership events by SWE agents")
|
| 570 |
|
| 571 |
with gr.Tabs():
|
| 572 |
|
| 573 |
# Leaderboard Tab
|
| 574 |
with gr.Tab("Leaderboard"):
|
| 575 |
+
gr.Markdown("*Statistics are based on total membership events by agents*")
|
| 576 |
leaderboard_table = Leaderboard(
|
| 577 |
value=pd.DataFrame(columns=[col[0] for col in LEADERBOARD_COLUMNS]), # Empty initially
|
| 578 |
datatype=LEADERBOARD_COLUMNS,
|
|
|
|
| 590 |
# Monthly Metrics Section
|
| 591 |
gr.Markdown("---") # Divider
|
| 592 |
gr.Markdown("### Monthly Performance - Top 5 Agents")
|
| 593 |
+
gr.Markdown("*Shows total membership events for the most active agents*")
|
| 594 |
|
| 595 |
monthly_metrics_plot = gr.Plot(label="Monthly Metrics")
|
| 596 |
|
msr.py
CHANGED
|
@@ -415,7 +415,6 @@ def fetch_all_member_metadata_streaming(conn, identifiers, start_date, end_date)
|
|
| 415 |
maximum_object_size=2147483648
|
| 416 |
)
|
| 417 |
WHERE type = 'MemberEvent'
|
| 418 |
-
AND TRY_CAST(json_extract_string(to_json(payload), '$.action') AS VARCHAR) = 'added'
|
| 419 |
AND TRY_CAST(json_extract_string(to_json(payload), '$.member.login') AS VARCHAR) IS NOT NULL
|
| 420 |
AND TRY_CAST(json_extract_string(to_json(actor), '$.login') AS VARCHAR) IN ({identifier_list})
|
| 421 |
"""
|
|
|
|
| 415 |
maximum_object_size=2147483648
|
| 416 |
)
|
| 417 |
WHERE type = 'MemberEvent'
|
|
|
|
| 418 |
AND TRY_CAST(json_extract_string(to_json(payload), '$.member.login') AS VARCHAR) IS NOT NULL
|
| 419 |
AND TRY_CAST(json_extract_string(to_json(actor), '$.login') AS VARCHAR) IN ({identifier_list})
|
| 420 |
"""
|