update docstrings, increment version

RJT1990 · RJT1990 · commit 4a82a3fa1c74 · 2019-10-09T09:14:59.000-07:00
diff --git a/docs/docs/index.md b/docs/docs/index.md
@@ -118,18 +118,12 @@ python sotabench.py
 
 You can also run the logic in a Jupyter Notebook if that is your preferred workflow.
 
-To verify your benchmark will run and all parameters are correct you can use the included CLI checking tool:
-
-```
-$ sb check
-```
-
 **Step Three : Login and connect your repository to [sotabench](http://www.sotabench.com)**
 
 Create an account on [sotabench](http://www.sotabench.com), then head to your user page. Click the
 **Connect a GitHub repository** button:
 
-![SotaBench](img/connect.png)
+<img width=400 src="img/connect.png">
 
 Then follow the steps to connect the repositories that you wish to benchmark:
 
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
@@ -4,6 +4,8 @@ theme:
   palette:
     primary: 'cyan'
     accent: 'cyan'
+  logo:
+    icon: 'explore'
 markdown_extensions:
   - admonition
   - codehilite
diff --git a/sotabencheval/core/evaluator.py b/sotabencheval/core/evaluator.py
@@ -125,7 +125,9 @@ def cache_exists(self):
 
                 evaluator.save()
 
-        :return: bool or None (if not in check mode)
+        This logic is for the server; it will not break the loop if you evaluate locally.
+
+        :return: bool or None (if not on server)
         """
 
         if not is_server():  # we only check the cache on the server
@@ -225,7 +227,6 @@ def save(self, **kwargs):
 
         :return: BenchmarkResult object with results and metadata
         """
-
         # recalculate to ensure no mistakes made during batch-by-batch metric calculation
         self.get_results()
 
diff --git a/sotabencheval/image_classification/imagenet.py b/sotabencheval/image_classification/imagenet.py
@@ -1,4 +1,5 @@
 # Some of the processing logic here is based on the torchvision ImageNet dataset
+# https://github.com/pytorch/vision/blob/master/torchvision/datasets/imagenet.py
 
 import numpy as np
 import os
@@ -12,6 +13,7 @@
 from sotabencheval.utils import get_max_memory_allocated
 from sotabencheval.image_classification.utils import top_k_accuracy_score
 
+
 ARCHIVE_DICT = {
     'labels': {
         'url': 'https://github.com/paperswithcode/sotabench-eval/releases/download/0.01/imagenet_val_targets.pkl',
@@ -37,6 +39,7 @@ class ImageNetEvaluator(object):
             from torch.utils.data import DataLoader
 
             from sotabencheval.image_classification import ImageNetEvaluator
+            from sotabencheval.utils import is_server
 
             if is_server():
                 DATA_ROOT = './.data/vision/imagenet'
@@ -107,11 +110,12 @@ def __init__(self,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  model_description=None,):
-        """Benchmarking function.
+        """Initializes an ImageNet Evaluator object
 
         Args:
             root (string): Root directory of the ImageNet Dataset - where the
-            label data is located (or will be downloaded to).
+            label data is located (or will be downloaded to). Note this does not download
+            the full ImageNet dataset (!) but just annotation information.
             model_name (str, optional): The name of the model from the
                 paper - if you want to link your build to a model from a
                 machine learning paper. See the ImageNet benchmark page for model names,
@@ -135,11 +139,12 @@ def __init__(self,
                 'Top 5 Accuracy'.
             model_description (str, optional): Optional model description.
         """
-
         root = self.root = os.path.expanduser(change_root_if_server(
             root=root,
             server_root="./.data/vision/imagenet"))
 
+        # Model metadata
+
         self.model_name = model_name
         self.paper_arxiv_id = paper_arxiv_id
         self.paper_pwc_id = paper_pwc_id
@@ -148,15 +153,19 @@ def __init__(self,
 
         self.top1 = AverageMeter()
         self.top5 = AverageMeter()
-
         self.load_targets()
 
         self.outputs = {}
         self.results = None
+
+        # Backend variables for hashing and caching
+
         self.first_batch_processed = False
         self.batch_hash = None
         self.cached_results = False
 
+        # Speed and memory metrics
+
         self.speed_mem_metrics = {}
         self.init_time = time.time()
 
@@ -167,9 +176,13 @@ def cache_exists(self):
         then sets self.results to cached results and returns True.
 
         You can use this property for control flow to break a for loop over a dataset
-        after the first iteration. This prevents rerunning the same calculation for the
+        after the first iteration. This prevents re-running the same calculation for the
         same model twice.
 
+        Q: Why should the user use this?
+        A: If you want fast "continuous evaluation" and don't want to avoid rerunning the same model over and over
+            each time you commit something new to your repository.
+
         Examples:
             Breaking a for loop for a PyTorch evaluation
 
@@ -192,9 +205,10 @@ def cache_exists(self):
 
                 evaluator.save()  # uses the cached results
 
-        :return:
-        """
+        This logic is for the server; it will not break the loop if you evaluate locally.
 
+        :return: bool or None (if not in check mode)
+        """
         if not self.first_batch_processed:
             raise ValueError('No batches of data have been processed so no batch_hash exists')
 
@@ -217,7 +231,8 @@ def cache_exists(self):
 
     def load_targets(self):
         """
-        Downloads ImageNet labels and IDs and puts into self.root, then loads at self.targets
+        Downloads ImageNet labels and IDs and puts into self.root, then loads to self.targets
+
         :return: void - update self.targets with the ImageNet validation data labels, and downloads if
         the pickled validation data is not in the root location
         """
@@ -233,7 +248,7 @@ def add(self, output_dict: dict):
         """
         Updates the evaluator with new results
 
-        :param output_dict (dict): Where keys are image IDs, and each value should be an 1D np.ndarray of size 1000
+        :param output_dict: (dict) Where keys are image IDs, and each value should be an 1D np.ndarray of size 1000
         containing logits for that image ID.
         :return: void - updates self.outputs with the new IDSs and prediction
 
@@ -245,7 +260,6 @@ def add(self, output_dict: dict):
                 my_evaluator.add({'ILSVRC2012_val_00000293': np.array([1.04243, ...]),
                 'ILSVRC2012_val_00000294': np.array([-2.3677, ...])})
         """
-
         if not output_dict:
             print('Empty output_dict; will not process')
             return
@@ -312,21 +326,28 @@ def get_results(self):
         return self.results
 
     def reset_time(self):
+        """
+        Simple method to reset the timer self.init_time. Often used before a loop, to time the evaluation
+        appropriately, for example:
+
+        :return: void - resets self.init_time
+        """
         self.init_time = time.time()
 
     def save(self):
         """
-        Calculate results and then puts into a BenchmarkResult object
+        Calculate results and then put into a BenchmarkResult object
 
-        On the sotabench.com server, this will produce a JSON file serialisation and results will be recorded
-        on the platform.
+        On the sotabench.com server, this will produce a JSON file serialisation in sotabench_results.json and results
+        will be recorded on the platform.
 
         :return: BenchmarkResult object with results and metadata
         """
-
         # recalculate to ensure no mistakes made during batch-by-batch metric calculation
         self.get_results()
 
+        # If this is the first time the model is run, then we record evaluation time information
+
         if not self.cached_results:
             exec_speed = (time.time() - self.init_time)
             self.speed_mem_metrics['Tasks / Evaluation Time'] = len(self.outputs) / exec_speed
diff --git a/sotabencheval/image_classification/utils.py b/sotabencheval/image_classification/utils.py
@@ -11,7 +11,7 @@ def top_k_accuracy_score(y_true, y_pred, k=5, normalize=True):
     """
 
     if len(y_true.shape) == 2:
-        y_true = y_true[0] # should be one-dimensional
+        y_true = y_true[0]  # should be one-dimensional
 
     num_obs, num_labels = y_pred.shape
 
diff --git a/sotabencheval/object_detection/coco.py b/sotabencheval/object_detection/coco.py
@@ -1,4 +1,5 @@
 # Some of the processing logic here is based on the torchvision COCO dataset
+# https://github.com/pytorch/vision/blob/master/torchvision/datasets/coco.py
 
 import copy
 import numpy as np
@@ -50,7 +51,7 @@ def __init__(self,
                  paper_pwc_id: str = None,
                  paper_results: dict = None,
                  model_description=None,):
-        """Benchmarking function.
+        """Initializes a COCO Evaluator object
 
         Args:
             root (string): Root directory of the COCO Dataset - where the
@@ -80,10 +81,11 @@ def __init__(self,
                 'AP75', 'APS', 'APM', 'APL'
             model_description (str, optional): Optional model description.
         """
-
         root = self.root = change_root_if_server(root=root,
                                                  server_root="./.data/vision/coco")
 
+        # Model metadata
+
         self.model_name = model_name
         self.paper_arxiv_id = paper_arxiv_id
         self.paper_pwc_id = paper_pwc_id
@@ -103,15 +105,25 @@ def __init__(self,
 
         self.detections = []
         self.results = None
+
+        # Backend variables for hashing and caching
+
         self.first_batch_processed = False
         self.batch_hash = None
         self.cached_results = False
 
-        self.speed_mem_metrics = {}
+        # Speed and memory metrics
 
+        self.speed_mem_metrics = {}
         self.init_time = time.time()
 
     def _download(self, annFile):
+        """
+        Utility function for downloading the COCO annotation file
+
+        :param annFile: path of the annotations file
+        :return: void - extracts the archive
+        """
         if not os.path.isdir(annFile):
             if "2017" in annFile:
                 annotations_dir_zip = os.path.join(
@@ -159,7 +171,6 @@ def cache_exists(self):
 
         :return: bool or None (if not in check mode)
         """
-
         if not self.first_batch_processed:
             raise ValueError('No batches of data have been processed so no batch_hash exists')
 
@@ -207,7 +218,6 @@ def cache_values(self, annotations, metrics):
         :param metrics: dictionary of final AP metrics
         :return: list of data (combining annotations and metrics)
         """
-
         metrics = {k: np.round(v, 3) for k, v in metrics.items()}
         new_annotations = copy.deepcopy(annotations)
         new_annotations = [self.cache_format_ann(ann) for ann in new_annotations]
@@ -236,7 +246,6 @@ def add(self, detections: list):
                 my_evaluator.add([{'image_id': 397133, 'bbox': [386.1628112792969, 69.48855590820312,
                 110.14895629882812, 278.2847595214844], 'score': 0.999152421951294, 'category_id': 1}])
         """
-
         self.detections.extend(detections)
 
         self.coco_evaluator.update(detections)
@@ -256,7 +265,6 @@ def get_results(self):
 
         :return: dict with COCO AP metrics
         """
-
         if self.cached_results:
             return self.results
 
@@ -272,6 +280,12 @@ def get_results(self):
         return self.results
 
     def reset_time(self):
+        """
+        Simple method to reset the timer self.init_time. Often used before a loop, to time the evaluation
+        appropriately, for example:
+
+        :return: void - resets self.init_time
+        """
         self.init_time = time.time()
 
     def save(self):
@@ -283,10 +297,11 @@ def save(self):
 
         :return: BenchmarkResult object with results and metadata
         """
-
         # recalculate to ensure no mistakes made during batch-by-batch metric calculation
         self.get_results()
 
+        # If this is the first time the model is run, then we record evaluation time information
+
         if not self.cached_results:
             unique_image_ids = set([d['image_id'] for d in self.detections])
             exec_speed = (time.time() - self.init_time)
diff --git a/sotabencheval/semantic_segmentation/ade20k.py b/sotabencheval/semantic_segmentation/ade20k.py
diff --git a/sotabencheval/semantic_segmentation/pascalvoc.py b/sotabencheval/semantic_segmentation/pascalvoc.py
diff --git a/sotabencheval/version.py b/sotabencheval/version.py