Skip to content

Commit 4861512

Browse files
authored
Enable benchmark for classification tasks and add anomaly task to workflow (#4260)
* Refactor benchmark criteria for classification tasks and add anomaly task to workflow
1 parent 7bc4908 commit 4861512

File tree

3 files changed

+30
-58
lines changed

3 files changed

+30
-58
lines changed

.github/workflows/perf_benchmark_v2.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,17 @@ jobs:
5151
fail-fast: false
5252
matrix:
5353
include:
54+
- task: ANOMALY
5455
- task: DETECTION
55-
# - task: MULTI_CLASS_CLS
56-
# - task: MULTI_LABEL_CLS
57-
# - task: H_LABEL_CLS
56+
- task: MULTI_CLASS_CLS
57+
- task: MULTI_LABEL_CLS
58+
- task: H_LABEL_CLS
5859
# - task: INSTANCE_SEGMENTATION
5960
# - task: SEMANTIC_SEGMENTATION
6061
# - task: VISUAL_PROMPTING
6162

6263
name: Perf-Benchmark-${{ matrix.task }}
63-
runs-on: [otxci02]
64+
runs-on: [self-hosted, linux, x64, dmount-v2]
6465
timeout-minutes: 8640
6566
steps:
6667
- name: Checkout repository

tests/perf_v2/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
OTXTaskType.INSTANCE_SEGMENTATION: instance_segmentation.BENCHMARK_CRITERIA,
2222
OTXTaskType.SEMANTIC_SEGMENTATION: semantic_segmentation.BENCHMARK_CRITERIA,
2323
OTXTaskType.ANOMALY: anomaly.BENCHMARK_CRITERIA,
24-
OTXTaskType.MULTI_CLASS_CLS: classification.MULTI_CLASS_BENCHMARK_CRITERIA,
25-
OTXTaskType.MULTI_LABEL_CLS: classification.MULTI_LABEL_BENCHMARK_CRITERIA,
26-
OTXTaskType.H_LABEL_CLS: classification.H_LABEL_CLS_BENCHMARK_CRITERIA,
24+
OTXTaskType.MULTI_CLASS_CLS: classification.CLASSIFICATION_BENCHMARK_CRITERIA,
25+
OTXTaskType.MULTI_LABEL_CLS: classification.CLASSIFICATION_BENCHMARK_CRITERIA,
26+
OTXTaskType.H_LABEL_CLS: classification.CLASSIFICATION_BENCHMARK_CRITERIA,
2727
OTXTaskType.VISUAL_PROMPTING: visual_prompting.BENCHMARK_CRITERIA,
2828
OTXTaskType.KEYPOINT_DETECTION: keypoint_detection.BENCHMARK_CRITERIA,
2929
}

tests/perf_v2/tasks/classification.py

Lines changed: 22 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,28 @@
1515

1616
from otx.core.types.task import OTXTaskType
1717

18+
CLASSIFICATION_BENCHMARK_CRITERIA = [
19+
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
20+
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
21+
Criterion(name="train/gpu_mem", summary="max", compare="<", margin=0.1),
22+
Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
23+
Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
24+
Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
25+
Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
26+
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
27+
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
28+
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
29+
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
30+
Criterion(name="optimize/e2e_time", summary="mean", compare="<", margin=0.1),
31+
Criterion(name="test(torch)/latency", summary="mean", compare="<", margin=0.1),
32+
Criterion(name="test(export)/latency", summary="mean", compare="<", margin=0.1),
33+
Criterion(name="test(optimize)/latency", summary="mean", compare="<", margin=0.1),
34+
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
35+
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
36+
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
37+
]
38+
39+
1840
# ============= Multi-class classification =============
1941

2042
MULTI_CLASS_MODEL_TEST_CASES = [
@@ -51,23 +73,6 @@
5173
),
5274
]
5375

54-
# TODO (someone): Compare with DETECTION CRITERIA and fill in the missing values
55-
MULTI_CLASS_BENCHMARK_CRITERIA = [
56-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
57-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
58-
Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
59-
Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
60-
Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
61-
Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
62-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
63-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
64-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
65-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
66-
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
67-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
68-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
69-
]
70-
7176

7277
# ============= Multi-label classification =============
7378
MULTI_LABEL_MODEL_TEST_CASES = [
@@ -100,23 +105,6 @@
100105
),
101106
]
102107

103-
# TODO (someone): Compare with DETECTION CRITERIA and fill in the missing values
104-
MULTI_LABEL_BENCHMARK_CRITERIA = [
105-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
106-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
107-
Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
108-
Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
109-
Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
110-
Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
111-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
112-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
113-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
114-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
115-
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
116-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
117-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
118-
]
119-
120108

121109
# ============= Hierarchical-label classification =============
122110

@@ -150,20 +138,3 @@
150138
extra_overrides={},
151139
),
152140
]
153-
154-
# TODO (someone): Compare with DETECTION CRITERIA and fill in the missing values
155-
H_LABEL_CLS_BENCHMARK_CRITERIA = [
156-
Criterion(name="train/epoch", summary="max", compare="<", margin=0.1),
157-
Criterion(name="train/e2e_time", summary="max", compare="<", margin=0.1),
158-
Criterion(name="val/accuracy", summary="max", compare=">", margin=0.1),
159-
Criterion(name="test/accuracy", summary="max", compare=">", margin=0.1),
160-
Criterion(name="export/accuracy", summary="max", compare=">", margin=0.1),
161-
Criterion(name="optimize/accuracy", summary="max", compare=">", margin=0.1),
162-
Criterion(name="train/iter_time", summary="mean", compare="<", margin=0.1),
163-
Criterion(name="test/iter_time", summary="mean", compare="<", margin=0.1),
164-
Criterion(name="export/iter_time", summary="mean", compare="<", margin=0.1),
165-
Criterion(name="optimize/iter_time", summary="mean", compare="<", margin=0.1),
166-
Criterion(name="test(train)/e2e_time", summary="max", compare=">", margin=0.1),
167-
Criterion(name="test(export)/e2e_time", summary="max", compare=">", margin=0.1),
168-
Criterion(name="test(optimize)/e2e_time", summary="max", compare=">", margin=0.1),
169-
]

0 commit comments

Comments
 (0)