Skip to content

Commit c81a91a

Browse files
ngc92msaroufim
authored andcommitted
refactoring: compute score in separate utility
1 parent 5d4337b commit c81a91a

File tree

2 files changed

+42
-34
lines changed

2 files changed

+42
-34
lines changed

src/libkernelbot/backend.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import asyncio
22
import copy
3-
import math
43
from datetime import datetime
54
from types import SimpleNamespace
65
from typing import Optional
76

8-
from libkernelbot.consts import GPU, GPU_TO_SM, RankCriterion, SubmissionMode, get_gpu_by_name
7+
from libkernelbot.consts import GPU, GPU_TO_SM, SubmissionMode, get_gpu_by_name
98
from libkernelbot.launchers import Launcher
109
from libkernelbot.leaderboard_db import LeaderboardDB
1110
from libkernelbot.report import (
@@ -15,9 +14,9 @@
1514
make_short_report,
1615
)
1716
from libkernelbot.run_eval import FullResult
18-
from libkernelbot.submission import ProcessedSubmissionRequest
17+
from libkernelbot.submission import ProcessedSubmissionRequest, compute_score
1918
from libkernelbot.task import LeaderboardTask, build_task_config
20-
from libkernelbot.utils import KernelBotError, setup_logging
19+
from libkernelbot.utils import setup_logging
2120

2221
logger = setup_logging(__name__)
2322

@@ -145,41 +144,16 @@ async def submit_leaderboard( # noqa: C901
145144
and result.runs["leaderboard"].run.success
146145
and result.runs["leaderboard"].run.passed
147146
):
148-
score = 0.0
149-
num_benchmarks = int(result.runs["leaderboard"].run.result["benchmark-count"])
150-
if task.ranking_by == RankCriterion.LAST:
151-
if num_benchmarks != 1:
152-
logger.error(
153-
"Ranked submission error for submission %d ranking_by is `last`, "
154-
"but got %d benchmarks",
155-
submission_id,
156-
num_benchmarks,
157-
)
158-
raise KernelBotError(
159-
f"Expected submission to have exactly one benchmark,"
160-
f"got {num_benchmarks}."
161-
)
162-
score = float(result.runs["leaderboard"].run.result["benchmark.0.mean"]) / 1e9
163-
else:
164-
scores = []
165-
for i in range(num_benchmarks):
166-
scores.append(
167-
float(result.runs["leaderboard"].run.result[f"benchmark.{i}.mean"])
168-
/ 1e9
169-
)
170-
if task.ranking_by == RankCriterion.MEAN:
171-
score = sum(scores) / len(scores)
172-
elif task.ranking_by == RankCriterion.GEOM:
173-
score = math.pow(math.prod(scores), 1.0 / num_benchmarks)
147+
score = compute_score(result, task, submission_id)
174148

175149
# verifyruns uses a fake submission id of -1
176150
if submission_id != -1:
177151
with self.db as db:
178152
for key, value in result.runs.items():
179153
db.create_submission_run(
180-
submission_id,
181-
value.start,
182-
value.end,
154+
submission=submission_id,
155+
start=value.start,
156+
end=value.end,
183157
mode=key,
184158
runner=gpu_type.name,
185159
score=None if key != "leaderboard" else score,

src/libkernelbot/submission.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,25 @@
11
import copy
22
import dataclasses
3+
import math
34
import typing
45
from datetime import datetime
56
from typing import Optional, Union
67

78
from better_profanity import profanity
89

10+
from libkernelbot.consts import RankCriterion
911
from libkernelbot.leaderboard_db import LeaderboardDB, LeaderboardItem
12+
from libkernelbot.run_eval import FullResult
1013
from libkernelbot.task import LeaderboardTask
11-
from libkernelbot.utils import KernelBotError
14+
from libkernelbot.utils import KernelBotError, setup_logging
1215

1316
if typing.TYPE_CHECKING:
1417
from backend import KernelBackend
1518

1619

20+
logger = setup_logging(__name__)
21+
22+
1723
@dataclasses.dataclass
1824
class SubmissionRequest:
1925
# to be filled in when making the request
@@ -147,3 +153,31 @@ def _get_popcorn_directives(submission: str) -> dict:
147153
elif arg == "leaderboard":
148154
popcorn_info["leaderboard"] = args[2]
149155
return popcorn_info
156+
157+
158+
def compute_score(result: FullResult, task: LeaderboardTask, submission_id: int) -> float:
159+
num_benchmarks = int(result.runs["leaderboard"].run.result["benchmark-count"])
160+
if task.ranking_by == RankCriterion.LAST:
161+
if num_benchmarks != 1:
162+
logger.error(
163+
"Ranked submission error for submission %d ranking_by is `last`, "
164+
"but got %d benchmarks",
165+
submission_id,
166+
num_benchmarks,
167+
)
168+
raise KernelBotError(
169+
f"Expected submission to have exactly one benchmark," f"got {num_benchmarks}."
170+
)
171+
score = float(result.runs["leaderboard"].run.result["benchmark.0.mean"]) / 1e9
172+
else:
173+
scores = []
174+
for i in range(num_benchmarks):
175+
scores.append(float(result.runs["leaderboard"].run.result[f"benchmark.{i}.mean"]) / 1e9)
176+
if task.ranking_by == RankCriterion.MEAN:
177+
score = sum(scores) / len(scores)
178+
elif task.ranking_by == RankCriterion.GEOM:
179+
score = math.pow(math.prod(scores), 1.0 / num_benchmarks)
180+
else:
181+
raise KernelBotError(f"Invalid submission mode {task.ranking_by}")
182+
183+
return score

0 commit comments

Comments
 (0)