Spaces:

BenjaminB
/

peft-repo-metrics

Running

Benjamin Bossan

Update title

c3d91d8 about 2 months ago

4.14 kB

	from pathlib import Path

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go


	CSV_PATH = Path("metrics.csv")


	def load_data() -> pd.DataFrame:
	df = pd.read_csv(CSV_PATH)
	# Normalize/parse columns
	if "date" not in df.columns:
	raise ValueError("Expected a 'date' column in metrics.csv")
	df["date"] = pd.to_datetime(df["date"], errors="coerce")
	df = df.sort_values("date").reset_index(drop=True)
	# Ensure numeric columns are numeric
	for c in df.columns:
	if c == "date":
	continue
	df[c] = pd.to_numeric(df[c], errors="coerce")
	return df


	def line_figure(df: pd.DataFrame, cols: list[str], title: str, yaxis_title: str = "") -> go.Figure:
	if not cols:
	# Empty placeholder so the UI doesn't error
	fig = go.Figure()
	fig.update_layout(title=f"{title} (no series selected)")
	return fig
	fig = px.line(
	df,
	x="date",
	y=cols,
	markers=True,
	title=title,
	)
	# Improve layout for time series
	fig.update_layout(
	legend_title_text="Series",
	xaxis_title="Date",
	yaxis_title=yaxis_title,
	hovermode="x unified",
	margin={"l": 50, "r": 20, "t": 50, "b": 40},
	)
	return fig


	TAB_SPEC = {
	"Docstrings": [
	"docstring coverage",
	"docstring missing",
	],
	"Size (Lines/Statements/Expressions/Parameters)": [
	"lines mean",
	"lines max",
	"lines 90th-percentile",
	"statements mean",
	"statements max",
	"statements 90th-percentile",
	"expressions mean",
	"expressions max",
	"expressions 90th-percentile",
	"parameters mean",
	"parameters max",
	"parameters 90th-percentile",
	],
	"Complexity": [
	"cyclomatic_complexity mean",
	"cyclomatic_complexity max",
	"cyclomatic_complexity 90th-percentile",
	],
	"Typing": [
	"type_coverage mean",
	"type_coverage min",
	"type_coverage 50th-percentile",
	],
	"Duplication": [
	"duplication.score mean",
	"duplication.score max",
	"duplication.score 90th-percentile",
	"duplication.score 50th-percentile",
	"duplication.duplicated-lines total",
	],
	"TODOs": [
	"todo_comments total",
	],
	"CLOC (Repository scope)": [
	"files",
	"lines blank",
	"lines comment",
	"lines code",
	],
	}

	Y_LABELS = {
	"Docstrings": "value",
	"Size (Lines/Statements/Expressions/Parameters)": "count",
	"Complexity": "complexity",
	"Typing": "fraction / coverage",
	"Duplication": "score / lines",
	"TODOs": "count",
	"CLOC (Repository scope)": "lines / files",
	}

	DF = load_data()

	with gr.Blocks(title="Code Metrics – Time Series", fill_height=True) as demo:
	gr.Markdown(
	"## PEFT Code Metrics Over Time\n"
	f"Loaded {CSV_PATH} with {len(DF)} rows spanning "
	f"{DF['date'].min().date()} → {DF['date'].max().date()}.\n\n"
	"Use each tab to pick the series you want to plot."
	)

	with gr.Tabs():
	tab_controls = []
	tab_plots = []

	for tab_name, series in TAB_SPEC.items():
	available = [s for s in series if s in DF.columns] # guard against missing cols
	with gr.Tab(tab_name):
	with gr.Row():
	sel = gr.CheckboxGroup(
	choices=available,
	value=available,
	label="Series",
	)
	plot = gr.Plot(
	value=line_figure(DF, available, tab_name, Y_LABELS.get(tab_name, "")),
	show_label=False,
	)

	sel.change(
	fn=lambda cols, t=tab_name: line_figure(DF, cols, t, Y_LABELS.get(t, "")),
	inputs=sel,
	outputs=plot,
	)

	tab_controls.append(sel)
	tab_plots.append(plot)


	if __name__ == "__main__":
	demo.launch()