Skip to content

Commit a0759ce

Browse files
authored
Merge pull request #6 from paperswithcode/new_timing
new timing
2 parents 95290bd + a743dc8 commit a0759ce

File tree

10 files changed

+64
-160
lines changed

10 files changed

+64
-160
lines changed
151 Bytes
Binary file not shown.
913 Bytes
Binary file not shown.

sotabencheval/core/evaluator.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from sotabenchapi.client import Client
44
from sotabenchapi.core import BenchmarkResult
5-
from sotabencheval.utils import is_server, AverageMeter
5+
from sotabencheval.utils import is_server
66
from sotabencheval.core.cache import cache_value
77

88

@@ -25,14 +25,8 @@ def __init__(self,
2525
self.results = None
2626
self._cache_exists = None
2727

28-
self.inference_time = AverageMeter()
29-
self.start_time = time.time()
30-
self.speed_mem_metrics = {
31-
'Tasks Per Second (Partial)': None,
32-
'Tasks Per Second (Total)': None,
33-
'Memory Allocated (Partial)': None,
34-
'Memory Allocated (Total)': None
35-
}
28+
self.init_time = time.time()
29+
self.speed_mem_metrics = {}
3630

3731
@property
3832
def cache_exists(self):
@@ -92,6 +86,9 @@ def cache_exists(self):
9286
def cache_values(self, **kwargs):
9387
return cache_value(kwargs)
9488

89+
def reset_time(self):
90+
self.init_time = time.time()
91+
9592
def save(self, **kwargs):
9693
"""
9794
Calculate results and then put into a BenchmarkResult object

sotabencheval/image_classification/imagenet.py

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,8 @@ def __init__(self,
157157
self.batch_hash = None
158158
self.cached_results = False
159159

160-
self.inference_time = AverageMeter()
161-
self.start_time = time.time()
162-
self.speed_mem_metrics = {
163-
'Tasks Per Second (Partial)': None,
164-
'Tasks Per Second (Total)': None,
165-
'Memory Allocated (Partial)': None,
166-
'Memory Allocated (Total)': None
167-
}
160+
self.speed_mem_metrics = {}
161+
self.init_time = time.time()
168162

169163
@property
170164
def cache_exists(self):
@@ -207,8 +201,6 @@ def cache_exists(self):
207201
if not is_server(): # we only check the cache on the server
208202
return None
209203

210-
self.speed_mem_metrics['Tasks Per Second (Partial)'] = len(self.outputs)/self.inference_time.sum
211-
212204
client = Client.public()
213205
cached_res = client.get_results_by_run_hash(self.batch_hash)
214206
if cached_res:
@@ -237,18 +229,6 @@ def load_targets(self):
237229
with open(os.path.join(self.root, 'imagenet_val_targets.pkl'), 'rb') as handle:
238230
self.targets = pickle.load(handle)
239231

240-
def update_inference_time(self):
241-
242-
if not self.outputs and self.inference_time.count < 1:
243-
# assuming this is the first time the evaluator is called
244-
self.inference_time.update(time.time() - self.start_time)
245-
elif not self.outputs and self.inference_time.count > 0:
246-
# assuming the user has cleared outputs, and is then readding (evaluation post batching)
247-
pass
248-
else:
249-
# if there are outputs and the inference time count is > 0
250-
self.inference_time.update(time.time() - self.start_time)
251-
252232
def add(self, output_dict: dict):
253233
"""
254234
Updates the evaluator with new results
@@ -270,7 +250,6 @@ def add(self, output_dict: dict):
270250
print('Empty output_dict; will not process')
271251
return
272252

273-
self.update_inference_time()
274253
self.outputs = dict(list(self.outputs.items()) + list(output_dict.items()))
275254

276255
for i, dict_key in enumerate(output_dict.keys()):
@@ -288,8 +267,6 @@ def add(self, output_dict: dict):
288267
self.batch_hash = calculate_batch_hash(hash_dict)
289268
self.first_batch_processed = True
290269

291-
self.start_time = time.time()
292-
293270
def get_results(self):
294271
"""
295272
Gets the results for the evaluator. This method only runs if predictions for all 5,000 ImageNet validation
@@ -330,11 +307,13 @@ def get_results(self):
330307
self.top5.update(prec5, 1)
331308

332309
self.results = {'Top 1 Accuracy': self.top1.avg, 'Top 5 Accuracy': self.top5.avg}
333-
self.speed_mem_metrics['Tasks Per Second (Total)'] = len(self.outputs) / self.inference_time.sum
334310
self.speed_mem_metrics['Max Memory Allocated (Total)'] = get_max_memory_allocated()
335311

336312
return self.results
337313

314+
def reset_time(self):
315+
self.init_time = time.time()
316+
338317
def save(self):
339318
"""
340319
Calculate results and then puts into a BenchmarkResult object
@@ -348,6 +327,11 @@ def save(self):
348327
# recalculate to ensure no mistakes made during batch-by-batch metric calculation
349328
self.get_results()
350329

330+
if not self.cached_results:
331+
self.speed_mem_metrics['Evaluation Time'] = len(self.outputs) / (time.time() - self.init_time)
332+
else:
333+
self.speed_mem_metrics['Evaluation Time'] = None
334+
351335
return BenchmarkResult(
352336
task=self.task,
353337
config={},

sotabencheval/machine_translation/wmt.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from enum import Enum
99
import time
1010

11+
1112
class WMTDataset(Enum):
1213
News2014 = "newstest2014"
1314
News2019 = "newstest2019"
@@ -52,8 +53,6 @@ def __init__(self,
5253

5354
self.metrics = TranslationMetrics(self.source_dataset_path, self.target_dataset_path)
5455

55-
self.start_time = time.time()
56-
5756
def _get_source_dataset_filename(self):
5857
if self.dataset == WMTDataset.News2014:
5958
other_lang = self.source_lang.value if self.target_lang == Language.English else self.target_lang.value
@@ -78,47 +77,36 @@ def _get_dataset_name(self):
7877
ds_names = {WMTDataset.News2014: "WMT2014", WMTDataset.News2019: "WMT2019"}
7978
return "{0} {1}-{2}".format(ds_names.get(self.dataset), self.source_lang.fullname, self.target_lang.fullname)
8079

81-
def update_inference_time(self):
82-
83-
if not self.metrics._results and self.inference_time.count < 1:
84-
# assuming this is the first time the evaluator is called
85-
self.inference_time.update(time.time() - self.start_time)
86-
elif not self.metrics._results and self.inference_time.count > 0:
87-
# assuming the user has reset outputs, and is then readding (evaluation post batching)
88-
pass
89-
else:
90-
# if there are outputs and the inference time count is > 0
91-
self.inference_time.update(time.time() - self.start_time)
9280

9381
def add(self, answers: Dict[str, str]):
9482

95-
self.update_inference_time()
96-
9783
self.metrics.add(answers)
9884

9985
if not self.first_batch_processed and self.metrics.has_data:
100-
self.speed_mem_metrics['Tasks Per Second (Partial)'] = len(self.metrics.answers) / self.inference_time.sum
10186
self.batch_hash = calculate_batch_hash(
10287
self.cache_values(answers=self.metrics.answers,
10388
metrics=self.metrics.get_results(ignore_missing=True))
10489
)
10590
self.first_batch_processed = True
10691

107-
self.start_time = time.time()
108-
10992
def reset(self):
11093
self.metrics.reset()
11194

11295
def get_results(self):
11396
if self.cached_results:
11497
return self.results
11598
self.results = self.metrics.get_results()
116-
self.speed_mem_metrics['Tasks Per Second (Total)'] = len(self.metrics.answers) / self.inference_time.sum
11799
self.speed_mem_metrics['Max Memory Allocated (Total)'] = get_max_memory_allocated()
118100

119101
return self.results
120102

121103
def save(self):
122104
dataset = self._get_dataset_name()
105+
106+
if not self.cached_results:
107+
self.speed_mem_metrics['Evaluation Time'] = len(self.metrics.answers) / (time.time() - self.init_time)
108+
else:
109+
self.speed_mem_metrics['Evaluation Time'] = None
110+
123111
return super().save(dataset=dataset)
124112

sotabencheval/object_detection/coco.py

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import time
1010

1111
from sotabencheval.utils import calculate_batch_hash, extract_archive, change_root_if_server, is_server
12-
from sotabencheval.utils import AverageMeter, get_max_memory_allocated
12+
from sotabencheval.utils import get_max_memory_allocated
1313
from sotabencheval.object_detection.coco_eval import CocoEvaluator
1414
from sotabencheval.object_detection.utils import get_coco_metrics
1515

@@ -107,15 +107,9 @@ def __init__(self,
107107
self.batch_hash = None
108108
self.cached_results = False
109109

110-
self.inference_time = AverageMeter()
111-
self.start_time = time.time()
112-
self.speed_mem_metrics = {
113-
'Tasks Per Second (Partial)': None,
114-
'Tasks Per Second (Total)': None,
115-
'Memory Allocated (Partial)': None,
116-
'Memory Allocated (PartTotalial)': None
117-
}
110+
self.speed_mem_metrics = {}
118111

112+
self.init_time = time.time()
119113

120114
def _download(self, annFile):
121115
if not os.path.isdir(annFile):
@@ -172,9 +166,6 @@ def cache_exists(self):
172166
if not is_server(): # we only check the cache on the server
173167
return None
174168

175-
unique_image_ids = set([d['image_id'] for d in self.detections])
176-
self.speed_mem_metrics['Tasks Per Second (Partial)'] = len(unique_image_ids)/self.inference_time.sum
177-
178169
client = Client.public()
179170
cached_res = client.get_results_by_run_hash(self.batch_hash)
180171
if cached_res:
@@ -223,18 +214,6 @@ def cache_values(self, annotations, metrics):
223214

224215
return new_annotations + [metrics]
225216

226-
def update_inference_time(self):
227-
228-
if not self.detections and self.inference_time.count < 1:
229-
# assuming this is the first time the evaluator is called
230-
self.inference_time.update(time.time() - self.start_time)
231-
elif not self.detections and self.inference_time.count > 0:
232-
# assuming the user has reset outputs, and is then readding (evaluation post batching)
233-
pass
234-
else:
235-
# if there are outputs and the inference time count is > 0
236-
self.inference_time.update(time.time() - self.start_time)
237-
238217
def add(self, detections: list):
239218
"""
240219
Update the evaluator with new detections
@@ -258,8 +237,6 @@ def add(self, detections: list):
258237
110.14895629882812, 278.2847595214844], 'score': 0.999152421951294, 'category_id': 1}])
259238
"""
260239

261-
self.update_inference_time()
262-
263240
self.detections.extend(detections)
264241

265242
self.coco_evaluator.update(detections)
@@ -273,8 +250,6 @@ def add(self, detections: list):
273250
self.cache_values(annotations=detections, metrics=get_coco_metrics(self.coco_evaluator)))
274251
self.first_batch_processed = True
275252

276-
self.start_time = time.time()
277-
278253
def get_results(self):
279254
"""
280255
Reruns the evaluation using the accumulated detections, returns COCO results with AP metrics
@@ -292,12 +267,13 @@ def get_results(self):
292267
self.coco_evaluator.summarize()
293268

294269
self.results = get_coco_metrics(self.coco_evaluator)
295-
unique_image_ids = set([d['image_id'] for d in self.detections])
296-
self.speed_mem_metrics['Tasks Per Second (Total)'] = len(unique_image_ids) / self.inference_time.sum
297270
self.speed_mem_metrics['Max Memory Allocated (Total)'] = get_max_memory_allocated()
298271

299272
return self.results
300273

274+
def reset_time(self):
275+
self.init_time = time.time()
276+
301277
def save(self):
302278
"""
303279
Calculate results and then put into a BenchmarkResult object
@@ -311,6 +287,12 @@ def save(self):
311287
# recalculate to ensure no mistakes made during batch-by-batch metric calculation
312288
self.get_results()
313289

290+
if not self.cached_results:
291+
unique_image_ids = set([d['image_id'] for d in self.detections])
292+
self.speed_mem_metrics['Evaluation Time'] = len(unique_image_ids) / (time.time() - self.init_time)
293+
else:
294+
self.speed_mem_metrics['Evaluation Time'] = None
295+
314296
return BenchmarkResult(
315297
task=self.task,
316298
config={},

sotabencheval/question_answering/squad.py

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import json
88
import time
99

10-
1110
class SQuADVersion(Enum):
1211
V11 = 'v1.1'
1312
V20 = 'v2.0'
@@ -35,48 +34,35 @@ def __init__(self,
3534

3635
self.metrics = SQuADMetrics(self.dataset_path, version)
3736

38-
self.start_time = time.time()
39-
40-
def update_inference_time(self):
41-
42-
if not self.metrics._results and self.inference_time.count < 1:
43-
# assuming this is the first time the evaluator is called
44-
self.inference_time.update(time.time() - self.start_time)
45-
elif not self.metrics._results and self.inference_time.count > 0:
46-
# assuming the user has reset outputs, and is then readding (evaluation post batching)
47-
pass
48-
else:
49-
# if there are outputs and the inference time count is > 0
50-
self.inference_time.update(time.time() - self.start_time)
51-
5237
def add(self, answers: Dict[str, str]):
53-
self.update_inference_time()
5438
self.metrics.add(answers)
5539

5640
if not self.first_batch_processed and self.metrics.has_data:
57-
self.speed_mem_metrics['Tasks Per Second (Partial)'] = len(self.metrics.answers) / self.inference_time.sum
5841
self.batch_hash = calculate_batch_hash(
5942
self.cache_values(answers=self.metrics.answers,
6043
metrics=self.metrics.get_results(ignore_missing=True))
6144
)
6245
self.first_batch_processed = True
6346

64-
self.start_time = time.time()
65-
6647
def reset(self):
6748
self.metrics.reset()
6849

6950
def get_results(self):
7051
if self.cached_results:
7152
return self.results
7253
self.results = self.metrics.get_results()
73-
self.speed_mem_metrics['Tasks Per Second (Total)'] = len(self.metrics.answers) / self.inference_time.sum
7454
self.speed_mem_metrics['Max Memory Allocated (Total)'] = get_max_memory_allocated()
7555

7656
return self.results
7757

7858
def save(self):
7959
dataset = "SQuAD{} dev".format(self.metrics.version.value[1:])
60+
61+
if not self.cached_results:
62+
self.speed_mem_metrics['Evaluation Time'] = len(self.metrics.answers) / (time.time() - self.init_time)
63+
else:
64+
self.speed_mem_metrics['Evaluation Time'] = None
65+
8066
return super().save(dataset=dataset)
8167

8268

0 commit comments

Comments
 (0)