Spaces:
Running
Running
| """ | |
| Column definitions for DeathMath Leaderboard table. | |
| """ | |
| import logging | |
| from dataclasses import dataclass, make_dataclass | |
| from enum import Enum | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| def fields(raw_class): | |
| """Extract non-dunder fields from a dataclass.""" | |
| return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] | |
| class Task: | |
| """Represents a benchmark task with its metrics.""" | |
| benchmark: str | |
| metric: str | |
| col_name: str | |
| class Tasks(Enum): | |
| """Available benchmark tasks for DeathMath leaderboard.""" | |
| math = Task("RussianMath", "score", "math_score") | |
| physics = Task("RussianPhysics", "score", "physics_score") | |
| combined = Task("Combined", "score", "score") | |
| class ColumnContent: | |
| """Configuration for a leaderboard table column.""" | |
| name: str | |
| type: str | |
| displayed_by_default: bool | |
| hidden: bool = False | |
| never_hidden: bool = False | |
| dummy: bool = False | |
| auto_eval_column_dict = [] | |
| auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("model", "markdown", True, never_hidden=True)]) | |
| auto_eval_column_dict.append(["score", ColumnContent, ColumnContent("score", "number", True)]) | |
| for task in Tasks: | |
| if task != Tasks.combined: | |
| auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)]) | |
| auto_eval_column_dict.append(["total_tokens", ColumnContent, ColumnContent("total_tokens", "number", False)]) | |
| auto_eval_column_dict.append(["evaluation_time", ColumnContent, ColumnContent("evaluation_time", "number", False)]) | |
| auto_eval_column_dict.append(["system_prompt", ColumnContent, ColumnContent("system_prompt", "str", False)]) | |
| AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) | |