Skip to content

Commit 4a82a3f

Browse files
committed
update docstrings, increment version
1 parent 03e1d3f commit 4a82a3f

File tree

9 files changed

+98
-42
lines changed

9 files changed

+98
-42
lines changed

docs/docs/index.md

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -118,18 +118,12 @@ python sotabench.py
118118

119119
You can also run the logic in a Jupyter Notebook if that is your preferred workflow.
120120

121-
To verify your benchmark will run and all parameters are correct you can use the included CLI checking tool:
122-
123-
```
124-
$ sb check
125-
```
126-
127121
**Step Three : Login and connect your repository to [sotabench](http://www.sotabench.com)**
128122

129123
Create an account on [sotabench](http://www.sotabench.com), then head to your user page. Click the
130124
**Connect a GitHub repository** button:
131125

132-
![SotaBench](img/connect.png)
126+
<img width=400 src="img/connect.png">
133127

134128
Then follow the steps to connect the repositories that you wish to benchmark:
135129

docs/mkdocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ theme:
44
palette:
55
primary: 'cyan'
66
accent: 'cyan'
7+
logo:
8+
icon: 'explore'
79
markdown_extensions:
810
- admonition
911
- codehilite

sotabencheval/core/evaluator.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,9 @@ def cache_exists(self):
125125
126126
evaluator.save()
127127
128-
:return: bool or None (if not in check mode)
128+
This logic is for the server; it will not break the loop if you evaluate locally.
129+
130+
:return: bool or None (if not on server)
129131
"""
130132

131133
if not is_server(): # we only check the cache on the server
@@ -225,7 +227,6 @@ def save(self, **kwargs):
225227
226228
:return: BenchmarkResult object with results and metadata
227229
"""
228-
229230
# recalculate to ensure no mistakes made during batch-by-batch metric calculation
230231
self.get_results()
231232

sotabencheval/image_classification/imagenet.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Some of the processing logic here is based on the torchvision ImageNet dataset
2+
# https://github.com/pytorch/vision/blob/master/torchvision/datasets/imagenet.py
23

34
import numpy as np
45
import os
@@ -12,6 +13,7 @@
1213
from sotabencheval.utils import get_max_memory_allocated
1314
from sotabencheval.image_classification.utils import top_k_accuracy_score
1415

16+
1517
ARCHIVE_DICT = {
1618
'labels': {
1719
'url': 'https://github.com/paperswithcode/sotabench-eval/releases/download/0.01/imagenet_val_targets.pkl',
@@ -37,6 +39,7 @@ class ImageNetEvaluator(object):
3739
from torch.utils.data import DataLoader
3840
3941
from sotabencheval.image_classification import ImageNetEvaluator
42+
from sotabencheval.utils import is_server
4043
4144
if is_server():
4245
DATA_ROOT = './.data/vision/imagenet'
@@ -107,11 +110,12 @@ def __init__(self,
107110
paper_pwc_id: str = None,
108111
paper_results: dict = None,
109112
model_description=None,):
110-
"""Benchmarking function.
113+
"""Initializes an ImageNet Evaluator object
111114
112115
Args:
113116
root (string): Root directory of the ImageNet Dataset - where the
114-
label data is located (or will be downloaded to).
117+
label data is located (or will be downloaded to). Note this does not download
118+
the full ImageNet dataset (!) but just annotation information.
115119
model_name (str, optional): The name of the model from the
116120
paper - if you want to link your build to a model from a
117121
machine learning paper. See the ImageNet benchmark page for model names,
@@ -135,11 +139,12 @@ def __init__(self,
135139
'Top 5 Accuracy'.
136140
model_description (str, optional): Optional model description.
137141
"""
138-
139142
root = self.root = os.path.expanduser(change_root_if_server(
140143
root=root,
141144
server_root="./.data/vision/imagenet"))
142145

146+
# Model metadata
147+
143148
self.model_name = model_name
144149
self.paper_arxiv_id = paper_arxiv_id
145150
self.paper_pwc_id = paper_pwc_id
@@ -148,15 +153,19 @@ def __init__(self,
148153

149154
self.top1 = AverageMeter()
150155
self.top5 = AverageMeter()
151-
152156
self.load_targets()
153157

154158
self.outputs = {}
155159
self.results = None
160+
161+
# Backend variables for hashing and caching
162+
156163
self.first_batch_processed = False
157164
self.batch_hash = None
158165
self.cached_results = False
159166

167+
# Speed and memory metrics
168+
160169
self.speed_mem_metrics = {}
161170
self.init_time = time.time()
162171

@@ -167,9 +176,13 @@ def cache_exists(self):
167176
then sets self.results to cached results and returns True.
168177
169178
You can use this property for control flow to break a for loop over a dataset
170-
after the first iteration. This prevents rerunning the same calculation for the
179+
after the first iteration. This prevents re-running the same calculation for the
171180
same model twice.
172181
182+
Q: Why should the user use this?
183+
A: If you want fast "continuous evaluation" and don't want to avoid rerunning the same model over and over
184+
each time you commit something new to your repository.
185+
173186
Examples:
174187
Breaking a for loop for a PyTorch evaluation
175188
@@ -192,9 +205,10 @@ def cache_exists(self):
192205
193206
evaluator.save() # uses the cached results
194207
195-
:return:
196-
"""
208+
This logic is for the server; it will not break the loop if you evaluate locally.
197209
210+
:return: bool or None (if not in check mode)
211+
"""
198212
if not self.first_batch_processed:
199213
raise ValueError('No batches of data have been processed so no batch_hash exists')
200214

@@ -217,7 +231,8 @@ def cache_exists(self):
217231

218232
def load_targets(self):
219233
"""
220-
Downloads ImageNet labels and IDs and puts into self.root, then loads at self.targets
234+
Downloads ImageNet labels and IDs and puts into self.root, then loads to self.targets
235+
221236
:return: void - update self.targets with the ImageNet validation data labels, and downloads if
222237
the pickled validation data is not in the root location
223238
"""
@@ -233,7 +248,7 @@ def add(self, output_dict: dict):
233248
"""
234249
Updates the evaluator with new results
235250
236-
:param output_dict (dict): Where keys are image IDs, and each value should be an 1D np.ndarray of size 1000
251+
:param output_dict: (dict) Where keys are image IDs, and each value should be an 1D np.ndarray of size 1000
237252
containing logits for that image ID.
238253
:return: void - updates self.outputs with the new IDSs and prediction
239254
@@ -245,7 +260,6 @@ def add(self, output_dict: dict):
245260
my_evaluator.add({'ILSVRC2012_val_00000293': np.array([1.04243, ...]),
246261
'ILSVRC2012_val_00000294': np.array([-2.3677, ...])})
247262
"""
248-
249263
if not output_dict:
250264
print('Empty output_dict; will not process')
251265
return
@@ -312,21 +326,28 @@ def get_results(self):
312326
return self.results
313327

314328
def reset_time(self):
329+
"""
330+
Simple method to reset the timer self.init_time. Often used before a loop, to time the evaluation
331+
appropriately, for example:
332+
333+
:return: void - resets self.init_time
334+
"""
315335
self.init_time = time.time()
316336

317337
def save(self):
318338
"""
319-
Calculate results and then puts into a BenchmarkResult object
339+
Calculate results and then put into a BenchmarkResult object
320340
321-
On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded
322-
on the platform.
341+
On the sotabench.com server, this will produce a JSON file serialisation in sotabench_results.json and results
342+
will be recorded on the platform.
323343
324344
:return: BenchmarkResult object with results and metadata
325345
"""
326-
327346
# recalculate to ensure no mistakes made during batch-by-batch metric calculation
328347
self.get_results()
329348

349+
# If this is the first time the model is run, then we record evaluation time information
350+
330351
if not self.cached_results:
331352
exec_speed = (time.time() - self.init_time)
332353
self.speed_mem_metrics['Tasks / Evaluation Time'] = len(self.outputs) / exec_speed

sotabencheval/image_classification/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def top_k_accuracy_score(y_true, y_pred, k=5, normalize=True):
1111
"""
1212

1313
if len(y_true.shape) == 2:
14-
y_true = y_true[0] # should be one-dimensional
14+
y_true = y_true[0] # should be one-dimensional
1515

1616
num_obs, num_labels = y_pred.shape
1717

sotabencheval/object_detection/coco.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Some of the processing logic here is based on the torchvision COCO dataset
2+
# https://github.com/pytorch/vision/blob/master/torchvision/datasets/coco.py
23

34
import copy
45
import numpy as np
@@ -50,7 +51,7 @@ def __init__(self,
5051
paper_pwc_id: str = None,
5152
paper_results: dict = None,
5253
model_description=None,):
53-
"""Benchmarking function.
54+
"""Initializes a COCO Evaluator object
5455
5556
Args:
5657
root (string): Root directory of the COCO Dataset - where the
@@ -80,10 +81,11 @@ def __init__(self,
8081
'AP75', 'APS', 'APM', 'APL'
8182
model_description (str, optional): Optional model description.
8283
"""
83-
8484
root = self.root = change_root_if_server(root=root,
8585
server_root="./.data/vision/coco")
8686

87+
# Model metadata
88+
8789
self.model_name = model_name
8890
self.paper_arxiv_id = paper_arxiv_id
8991
self.paper_pwc_id = paper_pwc_id
@@ -103,15 +105,25 @@ def __init__(self,
103105

104106
self.detections = []
105107
self.results = None
108+
109+
# Backend variables for hashing and caching
110+
106111
self.first_batch_processed = False
107112
self.batch_hash = None
108113
self.cached_results = False
109114

110-
self.speed_mem_metrics = {}
115+
# Speed and memory metrics
111116

117+
self.speed_mem_metrics = {}
112118
self.init_time = time.time()
113119

114120
def _download(self, annFile):
121+
"""
122+
Utility function for downloading the COCO annotation file
123+
124+
:param annFile: path of the annotations file
125+
:return: void - extracts the archive
126+
"""
115127
if not os.path.isdir(annFile):
116128
if "2017" in annFile:
117129
annotations_dir_zip = os.path.join(
@@ -159,7 +171,6 @@ def cache_exists(self):
159171
160172
:return: bool or None (if not in check mode)
161173
"""
162-
163174
if not self.first_batch_processed:
164175
raise ValueError('No batches of data have been processed so no batch_hash exists')
165176

@@ -207,7 +218,6 @@ def cache_values(self, annotations, metrics):
207218
:param metrics: dictionary of final AP metrics
208219
:return: list of data (combining annotations and metrics)
209220
"""
210-
211221
metrics = {k: np.round(v, 3) for k, v in metrics.items()}
212222
new_annotations = copy.deepcopy(annotations)
213223
new_annotations = [self.cache_format_ann(ann) for ann in new_annotations]
@@ -236,7 +246,6 @@ def add(self, detections: list):
236246
my_evaluator.add([{'image_id': 397133, 'bbox': [386.1628112792969, 69.48855590820312,
237247
110.14895629882812, 278.2847595214844], 'score': 0.999152421951294, 'category_id': 1}])
238248
"""
239-
240249
self.detections.extend(detections)
241250

242251
self.coco_evaluator.update(detections)
@@ -256,7 +265,6 @@ def get_results(self):
256265
257266
:return: dict with COCO AP metrics
258267
"""
259-
260268
if self.cached_results:
261269
return self.results
262270

@@ -272,6 +280,12 @@ def get_results(self):
272280
return self.results
273281

274282
def reset_time(self):
283+
"""
284+
Simple method to reset the timer self.init_time. Often used before a loop, to time the evaluation
285+
appropriately, for example:
286+
287+
:return: void - resets self.init_time
288+
"""
275289
self.init_time = time.time()
276290

277291
def save(self):
@@ -283,10 +297,11 @@ def save(self):
283297
284298
:return: BenchmarkResult object with results and metadata
285299
"""
286-
287300
# recalculate to ensure no mistakes made during batch-by-batch metric calculation
288301
self.get_results()
289302

303+
# If this is the first time the model is run, then we record evaluation time information
304+
290305
if not self.cached_results:
291306
unique_image_ids = set([d['image_id'] for d in self.detections])
292307
exec_speed = (time.time() - self.init_time)

0 commit comments

Comments
 (0)