Skip to content

Commit f8f90d4

Browse files
authored
Rename benchmark result columns (#4271)
* New results column names
1 parent 2cd8489 commit f8f90d4

File tree

9 files changed

+129
-130
lines changed

9 files changed

+129
-130
lines changed

tests/perf_v2/benchmark.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def train(
153153

154154
self._rename_raw_data(
155155
work_dir=Path(engine.work_dir),
156-
replaces={"train_": "train/", "{pre}": "train/"},
156+
replaces={"train_": "train/", "{pre}": "training:"},
157157
)
158158
del engine
159159
return total_time
@@ -196,9 +196,9 @@ def test(
196196
)
197197

198198
replace_map = {
199-
RunTestType.TORCH: {"test_": "test/", "{pre}": "test/"},
200-
RunTestType.EXPORT: {"test": test_type, "{pre}": f"{test_type}/"},
201-
RunTestType.OPTIMIZE: {"test": test_type, "{pre}": f"{test_type}/"},
199+
RunTestType.TORCH: {"test_": "test/", "{pre}": f"{test_type}:"},
200+
RunTestType.EXPORT: {"test_": "test/", "{pre}": f"{test_type}:"},
201+
RunTestType.OPTIMIZE: {"test_": "test/", "{pre}": f"{test_type}:"},
202202
}
203203

204204
extra_kwargs = {}
@@ -219,8 +219,8 @@ def test(
219219
# It is calculated by dividing the total time by the number of samples.
220220
latency = total_time / len(engine.datamodule.subsets["test"])
221221
extra_metrics = {
222-
f"test({test_type})/e2e_time": total_time,
223-
f"test({test_type})/latency": latency,
222+
f"{test_type}:test/e2e_time": total_time,
223+
f"{test_type}:test/latency": latency,
224224
}
225225
# =================
226226

@@ -296,8 +296,8 @@ def optimize(
296296
total_time = time() - start_time
297297

298298
# OTX does not create metrics.cvs during optimization,
299-
# So we are manually write optimize/e2e_time to csv.
300-
data_frame = pd.DataFrame({"optimize/e2e_time": [total_time]})
299+
# So we are manually write optimize:e2e_time to csv.
300+
data_frame = pd.DataFrame({"optimize:e2e_time": [total_time]})
301301
data_frame.to_csv(sub_work_dir / f"{SubCommand.OPTIMIZE.value}/metrics.csv", index=False)
302302
# =================
303303

@@ -403,7 +403,7 @@ def run(
403403
tags=tags,
404404
criteria=criteria,
405405
extra_metrics={
406-
"train/e2e_time": e2e_train_time,
406+
"training:e2e_time": e2e_train_time,
407407
},
408408
)
409409

tests/perf_v2/summary.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -175,17 +175,17 @@ def summarize_table(history: pd.DataFrame, task: OTXTaskType) -> list[pd.DataFra
175175

176176
# Metrics to summarize in aggregated table
177177
metrics = [
178-
"train/e2e_time",
179-
"train/epoch",
180-
"train/iter_time",
181-
"train/gpu_mem",
182-
f"export/{score_metric}",
183-
f"optimize/{score_metric}",
184-
f"test/{score_metric}",
185-
"test(torch)/latency",
186-
"test(export)/latency",
187-
"test(optimize)/latency",
188-
"optimize/e2e_time",
178+
"training:e2e_time",
179+
"training:epoch",
180+
"training:train/iter_time",
181+
"training:gpu_mem",
182+
f"torch:test/{score_metric}",
183+
f"export:test/{score_metric}",
184+
f"optimize:test/{score_metric}",
185+
"torch:test/latency",
186+
"export:test/latency",
187+
"optimize:test/latency",
188+
"optimize:test/e2e_time",
189189
]
190190

191191
raw_task_data = history.query(f"task == '{task.value}'")

tests/perf_v2/tasks/anomaly.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,20 @@
3838
]
3939

4040
BENCHMARK_CRITERIA = [
41-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
42-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
43-
Criterion(name="train/gpu_mem", summary="max", compare="<", margin=0.1),
44-
Criterion(name="test/image_F1Score", summary="max", compare=">", margin=0.1),
45-
Criterion(name="export/image_F1Score", summary="max", compare=">", margin=0.1),
46-
Criterion(name="optimize/image_F1Score", summary="max", compare=">", margin=0.1),
47-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
48-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
49-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
50-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
51-
Criterion(name="optimize/e2e_time", summary="mean", compare="<", margin=0.1),
52-
Criterion(name="test(torch)/latency", summary="mean", compare="<", margin=0.1),
53-
Criterion(name="test(export)/latency", summary="mean", compare="<", margin=0.1),
54-
Criterion(name="test(optimize)/latency", summary="mean", compare="<", margin=0.1),
55-
Criterion(name="test(torch)/e2e_time", summary="max", compare=">", margin=0.1),
56-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
57-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
41+
Criterion(name="training:epoch", summary="max", compare="<", margin=0.1),
42+
Criterion(name="training:e2e_time", summary="max", compare="<", margin=0.1),
43+
Criterion(name="training:gpu_mem", summary="max", compare="<", margin=0.1),
44+
Criterion(name="training:train/iter_time", summary="mean", compare="<", margin=0.1),
45+
Criterion(name="torch:test/image_F1Score", summary="max", compare=">", margin=0.1),
46+
Criterion(name="export:test/image_F1Score", summary="max", compare=">", margin=0.1),
47+
Criterion(name="optimize:test/image_F1Score", summary="max", compare=">", margin=0.1),
48+
Criterion(name="torch:test/iter_time", summary="mean", compare="<", margin=0.1),
49+
Criterion(name="export:test/iter_time", summary="mean", compare="<", margin=0.1),
50+
Criterion(name="optimize:test/iter_time", summary="mean", compare="<", margin=0.1),
51+
Criterion(name="torch:test/latency", summary="mean", compare="<", margin=0.1),
52+
Criterion(name="export:test/latency", summary="mean", compare="<", margin=0.1),
53+
Criterion(name="optimize:test/latency", summary="mean", compare="<", margin=0.1),
54+
Criterion(name="torch:test/e2e_time", summary="max", compare=">", margin=0.1),
55+
Criterion(name="export:test/e2e_time", summary="max", compare=">", margin=0.1),
56+
Criterion(name="optimize:test/e2e_time", summary="max", compare=">", margin=0.1),
5857
]

tests/perf_v2/tasks/classification.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,24 @@
1616
from otx.core.types.task import OTXTaskType
1717

1818
CLASSIFICATION_BENCHMARK_CRITERIA = [
19-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
20-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
21-
Criterion(name="train/gpu_mem", summary="max", compare="<", margin=0.1),
22-
Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
23-
Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
24-
Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
25-
Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
26-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
27-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
28-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
29-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
30-
Criterion(name="optimize/e2e_time", summary="mean", compare="<", margin=0.1),
31-
Criterion(name="test(torch)/latency", summary="mean", compare="<", margin=0.1),
32-
Criterion(name="test(export)/latency", summary="mean", compare="<", margin=0.1),
33-
Criterion(name="test(optimize)/latency", summary="mean", compare="<", margin=0.1),
34-
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
35-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
36-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
19+
Criterion(name="training:epoch", summary="max", compare="<", margin=0.1),
20+
Criterion(name="training:e2e_time", summary="max", compare="<", margin=0.1),
21+
Criterion(name="training:gpu_mem", summary="max", compare="<", margin=0.1),
22+
Criterion(name="training:val/accuracy", summary="max", compare=">", margin=0.1),
23+
Criterion(name="torch:test/accuracy", summary="max", compare=">", margin=0.1),
24+
Criterion(name="export:test/accuracy", summary="max", compare=">", margin=0.1),
25+
Criterion(name="optimize:test/accuracy", summary="max", compare=">", margin=0.1),
26+
Criterion(name="training:train/iter_time", summary="mean", compare="<", margin=0.1),
27+
Criterion(name="torch:test/iter_time", summary="mean", compare="<", margin=0.1),
28+
Criterion(name="export:test/iter_time", summary="mean", compare="<", margin=0.1),
29+
Criterion(name="optimize:test/iter_time", summary="mean", compare="<", margin=0.1),
30+
Criterion(name="optimize:e2e_time", summary="mean", compare="<", margin=0.1),
31+
Criterion(name="torch:test/latency", summary="mean", compare="<", margin=0.1),
32+
Criterion(name="export:test/latency", summary="mean", compare="<", margin=0.1),
33+
Criterion(name="optimize:test/latency", summary="mean", compare="<", margin=0.1),
34+
Criterion(name="train:test/e2e_time", summary="max", compare=">", margin=0.1),
35+
Criterion(name="export:test/e2e_time", summary="max", compare=">", margin=0.1),
36+
Criterion(name="optimize:test/e2e_time", summary="max", compare=">", margin=0.1),
3737
]
3838

3939

tests/perf_v2/tasks/detection.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,22 @@
4747
]
4848

4949
BENCHMARK_CRITERIA = [
50-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
51-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
52-
Criterion(name="train/gpu_mem", summary="max", compare="<", margin=0.1),
53-
Criterion(name="val/f1-score", summary="max", compare=">", margin=0.1),
54-
Criterion(name="test/f1-score", summary="max", compare=">", margin=0.1),
55-
Criterion(name="export/f1-score", summary="max", compare=">", margin=0.1),
56-
Criterion(name="optimize/f1-score", summary="max", compare=">", margin=0.1),
57-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
58-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
59-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
60-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
61-
Criterion(name="optimize/e2e_time", summary="mean", compare="<", margin=0.1),
62-
Criterion(name="test(torch)/latency", summary="mean", compare="<", margin=0.1),
63-
Criterion(name="test(export)/latency", summary="mean", compare="<", margin=0.1),
64-
Criterion(name="test(optimize)/latency", summary="mean", compare="<", margin=0.1),
65-
Criterion(name="test(torch)/e2e_time", summary="max", compare=">", margin=0.1),
66-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
67-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
50+
Criterion(name="training:epoch", summary="max", compare="<", margin=0.1),
51+
Criterion(name="training:e2e_time", summary="max", compare="<", margin=0.1),
52+
Criterion(name="training:gpu_mem", summary="max", compare="<", margin=0.1),
53+
Criterion(name="training:train/iter_time", summary="mean", compare="<", margin=0.1),
54+
Criterion(name="training:val/f1-score", summary="max", compare=">", margin=0.1),
55+
Criterion(name="torch:test/f1-score", summary="max", compare=">", margin=0.1),
56+
Criterion(name="export:test/f1-score", summary="max", compare=">", margin=0.1),
57+
Criterion(name="optimize:test/f1-score", summary="max", compare=">", margin=0.1),
58+
Criterion(name="torch:test/iter_time", summary="mean", compare="<", margin=0.1),
59+
Criterion(name="export:test/iter_time", summary="mean", compare="<", margin=0.1),
60+
Criterion(name="optimize:test/iter_time", summary="mean", compare="<", margin=0.1),
61+
Criterion(name="optimize:e2e_time", summary="mean", compare="<", margin=0.1),
62+
Criterion(name="torch:test/latency", summary="mean", compare="<", margin=0.1),
63+
Criterion(name="export:test/latency", summary="mean", compare="<", margin=0.1),
64+
Criterion(name="optimize:test/latency", summary="mean", compare="<", margin=0.1),
65+
Criterion(name="torch:test/e2e_time", summary="max", compare=">", margin=0.1),
66+
Criterion(name="export:test/e2e_time", summary="max", compare=">", margin=0.1),
67+
Criterion(name="optimize:test/e2e_time", summary="max", compare=">", margin=0.1),
6868
]

tests/perf_v2/tasks/instance_segmentation.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -63,22 +63,22 @@
6363
]
6464

6565
BENCHMARK_CRITERIA = [
66-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
67-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
68-
Criterion(name="train/gpu_mem", summary="max", compare="<", margin=0.1),
69-
Criterion(name="val/f1-score", summary="max", compare=">", margin=0.1),
70-
Criterion(name="test/f1-score", summary="max", compare=">", margin=0.1),
71-
Criterion(name="export/f1-score", summary="max", compare=">", margin=0.1),
72-
Criterion(name="optimize/f1-score", summary="max", compare=">", margin=0.1),
73-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
74-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
75-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
76-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
77-
Criterion(name="optimize/e2e_time", summary="mean", compare="<", margin=0.1),
78-
Criterion(name="test(torch)/latency", summary="mean", compare="<", margin=0.1),
79-
Criterion(name="test(export)/latency", summary="mean", compare="<", margin=0.1),
80-
Criterion(name="test(optimize)/latency", summary="mean", compare="<", margin=0.1),
81-
Criterion(name="test(torch)/e2e_time", summary="max", compare=">", margin=0.1),
82-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
83-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
66+
Criterion(name="training:epoch", summary="max", compare="<", margin=0.1),
67+
Criterion(name="training:e2e_time", summary="max", compare="<", margin=0.1),
68+
Criterion(name="training:gpu_mem", summary="max", compare="<", margin=0.1),
69+
Criterion(name="training:val/f1-score", summary="max", compare=">", margin=0.1),
70+
Criterion(name="torch:test/f1-score", summary="max", compare=">", margin=0.1),
71+
Criterion(name="export:test/f1-score", summary="max", compare=">", margin=0.1),
72+
Criterion(name="optimize:test/f1-score", summary="max", compare=">", margin=0.1),
73+
Criterion(name="training:train/iter_time", summary="mean", compare="<", margin=0.1),
74+
Criterion(name="torch:test/iter_time", summary="mean", compare="<", margin=0.1),
75+
Criterion(name="export:test/iter_time", summary="mean", compare="<", margin=0.1),
76+
Criterion(name="optimize:test/iter_time", summary="mean", compare="<", margin=0.1),
77+
Criterion(name="optimize:e2e_time", summary="mean", compare="<", margin=0.1),
78+
Criterion(name="torch:test/latency", summary="mean", compare="<", margin=0.1),
79+
Criterion(name="export:test/latency", summary="mean", compare="<", margin=0.1),
80+
Criterion(name="optimize:test/latency", summary="mean", compare="<", margin=0.1),
81+
Criterion(name="torch:test/e2e_time", summary="max", compare=">", margin=0.1),
82+
Criterion(name="export:test/e2e_time", summary="max", compare=">", margin=0.1),
83+
Criterion(name="optimize:test/e2e_time", summary="max", compare=">", margin=0.1),
8484
]

tests/perf_v2/tasks/keypoint_detection.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,17 @@
4444

4545
# TODO (someone): align with detection task (adding gpu_mem, latency, optimize/e2e, etc)
4646
BENCHMARK_CRITERIA = [
47-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
48-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
49-
Criterion(name="val/PCK", summary="max", compare=">", margin=0.1),
50-
Criterion(name="test/PCK", summary="max", compare=">", margin=0.1),
51-
Criterion(name="export/PCK", summary="max", compare=">", margin=0.1),
52-
Criterion(name="optimize/PCK", summary="max", compare=">", margin=0.1),
53-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
54-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
55-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
56-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
57-
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
58-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
59-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
47+
Criterion(name="training:epoch", summary="max", compare="<", margin=0.1),
48+
Criterion(name="training:e2e_time", summary="max", compare="<", margin=0.1),
49+
Criterion(name="training:val/PCK", summary="max", compare=">", margin=0.1),
50+
Criterion(name="torch:test/PCK", summary="max", compare=">", margin=0.1),
51+
Criterion(name="export:test/PCK", summary="max", compare=">", margin=0.1),
52+
Criterion(name="optimize:test/PCK", summary="max", compare=">", margin=0.1),
53+
Criterion(name="training:train/iter_time", summary="mean", compare="<", margin=0.1),
54+
Criterion(name="torch:test/iter_time", summary="mean", compare="<", margin=0.1),
55+
Criterion(name="export:test/iter_time", summary="mean", compare="<", margin=0.1),
56+
Criterion(name="optimize:test/iter_time", summary="mean", compare="<", margin=0.1),
57+
Criterion(name="torch:test/e2e_time", summary="max", compare=">", margin=0.1),
58+
Criterion(name="export:test/e2e_time", summary="max", compare=">", margin=0.1),
59+
Criterion(name="optimize:test/e2e_time", summary="max", compare=">", margin=0.1),
6060
]

tests/perf_v2/tasks/semantic_segmentation.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,17 @@
5252

5353
# TODO (someone): align with detection task (adding gpu_mem, latency, optimize/e2e, etc)
5454
BENCHMARK_CRITERIA = [
55-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
56-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
57-
Criterion(name="val/Dice", summary="max", compare=">", margin=0.1),
58-
Criterion(name="test/Dice", summary="max", compare=">", margin=0.1),
59-
Criterion(name="export/Dice", summary="max", compare=">", margin=0.1),
60-
Criterion(name="optimize/Dice", summary="max", compare=">", margin=0.1),
61-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
62-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
63-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
64-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
65-
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
66-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
67-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
55+
Criterion(name="training:epoch", summary="max", compare="<", margin=0.1),
56+
Criterion(name="training:e2e_time", summary="max", compare="<", margin=0.1),
57+
Criterion(name="training:val/Dice", summary="max", compare=">", margin=0.1),
58+
Criterion(name="torch:test/Dice", summary="max", compare=">", margin=0.1),
59+
Criterion(name="export:test/Dice", summary="max", compare=">", margin=0.1),
60+
Criterion(name="optimize:test/Dice", summary="max", compare=">", margin=0.1),
61+
Criterion(name="training:train/iter_time", summary="mean", compare="<", margin=0.1),
62+
Criterion(name="torch:test/iter_time", summary="mean", compare="<", margin=0.1),
63+
Criterion(name="export:test/iter_time", summary="mean", compare="<", margin=0.1),
64+
Criterion(name="optimize:test/iter_time", summary="mean", compare="<", margin=0.1),
65+
Criterion(name="torch:test/e2e_time", summary="max", compare=">", margin=0.1),
66+
Criterion(name="export:test/e2e_time", summary="max", compare=">", margin=0.1),
67+
Criterion(name="optimize:test/e2e_time", summary="max", compare=">", margin=0.1),
6868
]

0 commit comments

Comments
 (0)