|
1 | 1 | import asyncio |
2 | 2 | import copy |
3 | | -import math |
4 | 3 | from datetime import datetime |
5 | 4 | from types import SimpleNamespace |
6 | 5 | from typing import Optional |
7 | 6 |
|
8 | | -from libkernelbot.consts import GPU, GPU_TO_SM, RankCriterion, SubmissionMode, get_gpu_by_name |
| 7 | +from libkernelbot.consts import GPU, GPU_TO_SM, SubmissionMode, get_gpu_by_name |
9 | 8 | from libkernelbot.launchers import Launcher |
10 | 9 | from libkernelbot.leaderboard_db import LeaderboardDB |
11 | 10 | from libkernelbot.report import ( |
|
15 | 14 | make_short_report, |
16 | 15 | ) |
17 | 16 | from libkernelbot.run_eval import FullResult |
18 | | -from libkernelbot.submission import ProcessedSubmissionRequest |
| 17 | +from libkernelbot.submission import ProcessedSubmissionRequest, compute_score |
19 | 18 | from libkernelbot.task import LeaderboardTask, build_task_config |
20 | | -from libkernelbot.utils import KernelBotError, setup_logging |
| 19 | +from libkernelbot.utils import setup_logging |
21 | 20 |
|
22 | 21 | logger = setup_logging(__name__) |
23 | 22 |
|
@@ -145,41 +144,16 @@ async def submit_leaderboard( # noqa: C901 |
145 | 144 | and result.runs["leaderboard"].run.success |
146 | 145 | and result.runs["leaderboard"].run.passed |
147 | 146 | ): |
148 | | - score = 0.0 |
149 | | - num_benchmarks = int(result.runs["leaderboard"].run.result["benchmark-count"]) |
150 | | - if task.ranking_by == RankCriterion.LAST: |
151 | | - if num_benchmarks != 1: |
152 | | - logger.error( |
153 | | - "Ranked submission error for submission %d ranking_by is `last`, " |
154 | | - "but got %d benchmarks", |
155 | | - submission_id, |
156 | | - num_benchmarks, |
157 | | - ) |
158 | | - raise KernelBotError( |
159 | | - f"Expected submission to have exactly one benchmark," |
160 | | - f"got {num_benchmarks}." |
161 | | - ) |
162 | | - score = float(result.runs["leaderboard"].run.result["benchmark.0.mean"]) / 1e9 |
163 | | - else: |
164 | | - scores = [] |
165 | | - for i in range(num_benchmarks): |
166 | | - scores.append( |
167 | | - float(result.runs["leaderboard"].run.result[f"benchmark.{i}.mean"]) |
168 | | - / 1e9 |
169 | | - ) |
170 | | - if task.ranking_by == RankCriterion.MEAN: |
171 | | - score = sum(scores) / len(scores) |
172 | | - elif task.ranking_by == RankCriterion.GEOM: |
173 | | - score = math.pow(math.prod(scores), 1.0 / num_benchmarks) |
| 147 | + score = compute_score(result, task, submission_id) |
174 | 148 |
|
175 | 149 | # verifyruns uses a fake submission id of -1 |
176 | 150 | if submission_id != -1: |
177 | 151 | with self.db as db: |
178 | 152 | for key, value in result.runs.items(): |
179 | 153 | db.create_submission_run( |
180 | | - submission_id, |
181 | | - value.start, |
182 | | - value.end, |
| 154 | + submission=submission_id, |
| 155 | + start=value.start, |
| 156 | + end=value.end, |
183 | 157 | mode=key, |
184 | 158 | runner=gpu_type.name, |
185 | 159 | score=None if key != "leaderboard" else score, |
|
0 commit comments