update comments for datasets, options, and utils

junyanz · junyanz · commit 2215995e70b1 · 2019-01-04T14:34:56.000-05:00
diff --git a/data/__init__.py b/data/__init__.py
@@ -7,15 +7,15 @@
     -- <__getitem__>　(get a data point)
     -- (optionally) <modify_commandline_options> (add dataset-specific options and set default options).
 Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
-See our template dataset class 'template_dataset.py' for an example.
+See our template dataset class 'template_dataset.py' for more details.
 """
 import importlib
 import torch.utils.data
 from data.base_dataset import BaseDataset
 
 
 def find_dataset_using_name(dataset_name):
-    """Import the module "data/[datasetname]_dataset.py" given the option --dataset_mode [datasetname].
+    """Import the module "data/[datasetname]_dataset.py" given the option '--dataset_mode [datasetname].
 
     In the file, the class called DatasetNameDataset() will
     be instantiated. It has to be a subclass of BaseDataset,
@@ -64,8 +64,8 @@ class CustomDatasetDataLoader():
     def __init__(self, opt):
         """Initialize this class
 
-        It first create a dataset instance given the name [dataset_mode]
-        It then create a multi-threaded data loader.
+        Step 1: create a dataset instance given the name [dataset_mode]
+        Step 2: create a multi-threaded data loader.
         """
         self.opt = opt
         dataset_class = find_dataset_using_name(opt.dataset_mode)
diff --git a/data/aligned_dataset.py b/data/aligned_dataset.py
@@ -17,7 +17,7 @@ def __init__(self, opt):
         """Initialize this dataset class.
 
         Parameters:
-            opt -- stores all the experiment flags; needs to be a subclass of BaseOptions
+            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
         """
         BaseDataset.__init__(self, opt)
         self.dir_AB = os.path.join(opt.dataroot, opt.phase)  # get the image directory
@@ -37,10 +37,10 @@ def __getitem__(self, index):
             index - - a random integer for data indexing
 
         Returns a dictionary that contains A, B, A_paths and B_paths
-            A(tensor) - - an image in the input domain
-            B(tensor) - - its corresponding image in the target domain
-            A_paths(str) - - image paths
-            B_paths(str) - - image paths
+            A (tensor) - - an image in the input domain
+            B (tensor) - - its corresponding image in the target domain
+            A_paths (str) - - image paths
+            B_paths (str) - - image paths (same as A_paths)
         """
         # read a image given a random integer index
         AB_path = self.AB_paths[index]
diff --git a/data/base_dataset.py b/data/base_dataset.py
@@ -61,8 +61,15 @@ def __getitem__(self, index):
 def get_transform(opt, grayscale=False, convert=True, crop=True, flip=True):
     """Create a torchvision transformation function
 
-    The type of transformation is defined by option(e.g., [preprocess], [load_size], [crop_size])
+    The type of transformation is defined by option (e.g., [opt.preprocess], [opt.load_size], [opt.crop_size])
     and can be overwritten by arguments such as [convert], [crop], and [flip]
+
+    Parameters:
+        opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
+        grayscale (bool)   -- if convert input RGB image to a grayscale image
+        convert (bool)     -- if convert an image to a tensor array betwen [-1, 1]
+        crop    (bool)     -- if apply cropping
+        flip    (bool)     -- if apply horizontal flippling
     """
     transform_list = []
     if grayscale:
@@ -94,7 +101,12 @@ def get_transform(opt, grayscale=False, convert=True, crop=True, flip=True):
 
 
 def __adjust(img):
-    """Modify the width and height to be multiple of 4
+    """Modify the width and height to be multiple of 4.
+
+    Parameters:
+        img (PIL image) -- input image
+
+    Returns a modified image whose width and height are mulitple of 4.
 
     the size needs to be a multiple of 4,
     because going through generator network may change img size
@@ -118,6 +130,12 @@ def __adjust(img):
 def __scale_width(img, target_width):
     """Resize images so that the width of the output image is the same as a target width
 
+    Parameters:
+        img (PIL image)    -- input image
+        target_width (int) -- target image width
+
+    Returns a modified image whose width matches the target image width;
+
     the size needs to be a multiple of 4,
     because going through generator network may change img size
     and eventually cause size mismatch error
diff --git a/data/colorization_dataset.py b/data/colorization_dataset.py
@@ -8,24 +8,45 @@
 
 
 class ColorizationDataset(BaseDataset):
-    """This dataset class can load a set of nature images in RGB, and convert RGB format into (L, ab) pairs in Lab color space.
+    """This dataset class can load a set of natural images in RGB, and convert RGB format into (L, ab) pairs in Lab color space.
 
     This dataset is required by pix2pix-based colorization model ('--model colorization')
     """
     @staticmethod
     def modify_commandline_options(parser, is_train):
+        """Add new dataset-specific options, and rewrite default values for existing options.
+
+        By default, the number of channels for input image  is 1 (L) and
+         the nubmer of channels for output image is 2 (ab). The direction is from A to B
+        """
         parser.set_defaults(input_nc=1, output_nc=2, direction='AtoB')
         return parser
 
     def __init__(self, opt):
+        """Initialize this dataset class.
+
+        Parameters:
+            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
+        """
         BaseDataset.__init__(self, opt)
-        self.dir_A = os.path.join(opt.dataroot)
-        self.A_paths = sorted(make_dataset(self.dir_A, opt.max_dataset_size))
+        self.dir = os.path.join(opt.dataroot)
+        self.AB_paths = sorted(make_dataset(self.dir, opt.max_dataset_size))
         assert(opt.input_nc == 1 and opt.output_nc == 2 and opt.direction == 'AtoB')
         self.transform = get_transform(opt, convert=False)
 
     def __getitem__(self, index):
-        path = self.A_paths[index]
+        """Return a data point and its metadata information.
+
+        Parameters:
+            index - - a random integer for data indexing
+
+        Returns a dictionary that contains A, B, A_paths and B_paths
+            A (tensor) - - the L channel of an image
+            B (tensor) - - the ab channels of the same image
+            A_paths (str) - - image paths
+            B_paths (str) - - image paths (same as A_paths)
+        """
+        path = self.AB_paths[index]
         im = Image.open(path).convert('RGB')
         im = self.transform(im)
         im = np.array(im)
@@ -36,4 +57,5 @@ def __getitem__(self, index):
         return {'A': A, 'B': B, 'A_paths': path, 'B_paths': path}
 
     def __len__(self):
-        return len(self.A_paths)
+        """Return the total number of images in the dataset."""
+        return len(self.AB_paths)
diff --git a/data/template_dataset.py b/data/template_dataset.py
@@ -1,6 +1,6 @@
 """Dataset class template
 
-This module provides a templete for users to implement custom datasets.
+This module provides a template for users to implement custom datasets.
 You can specify '--dataset_mode template' to use this dataset.
 The class name should be consistent with both the filename and its dataset_mode option.
 The filename should be <dataset_mode>_dataset.py
@@ -47,7 +47,7 @@ def __init__(self, opt):
         # save the option and dataset root
         BaseDataset.__init__(self, opt)
         # get the image paths of your dataset;
-        self.image_paths = []  # You can call <sorted(make_dataset(self.root, opt.max_dataset_size))> to get all the image paths under the directory self.root
+        self.image_paths = []  # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root
         # define the default transform function. You can use <base_dataset.get_transform>; You can also define your custom transform function
         self.transform = get_transform(opt)
 
@@ -58,11 +58,11 @@ def __getitem__(self, index):
             index -- a random integer for data indexing
 
         Returns:
-            a dictionary of data with their names. It ususally contains the data itself and its metadata information.
+            a dictionary of data with their names. It usually contains the data itself and its metadata information.
 
         Step 1: get a random image path: e.g., path = self.image_paths[index]
         Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB').
-        Step 3: convert your data to a PyTorch tensor. You can use function such as self.transform. e.g., data = self.transform(image)
+        Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image)
         Step 4: return a data point as a dictionary.
         """
         path = 'temp'    # needs to be a string
diff --git a/docs/overview.md b/docs/overview.md
@@ -39,7 +39,7 @@ To help users better understand and use our codebase, we briefly overview the fu
 [util](../util) directory includes a miscellaneous collection of useful helper functions.
   * [\_\_init\_\_.py](../util/__init__.py) is required to make Python treat the directory `util` as containing packages,
   * [get_data.py](../util/get_data.py) provides a Python script for downloading CycleGAN and pix2pix datasets.  Alternatively, You can also use bash scripts such as [download_pix2pix_model.sh](../scripts/download_pix2pix_model.sh) and [download_cyclegan_model.sh](../scripts/download_cyclegan_model.sh).
-  * [html.py](../util/html.py) implements a module that saves images into a single HTML file.  It consists of functions such as `add_header` (add a text header to the HTML file), `add_images` (add a row of images to the HTML file), `save` (save the HTML to the disk). It is based on Python library `dominate`, a Python library for creating and manipulating HTML documents using an elegant DOM API.
+  * [html.py](../util/html.py) implements a module that saves images into a single HTML file.  It consists of functions such as `add_header` (add a text header to the HTML file), `add_images` (add a row of images to the HTML file), `save` (save the HTML to the disk). It is based on Python library `dominate`, a Python library for creating and manipulating HTML documents using a DOM API.
   * [image_pool.py](../util/image_pool.py) implements an image buffer that stores previously generated images. This buffer enables us to update discriminators using a history of generated images rather than the ones produced by the latest generators. The original idea was discussed in this [paper](http://openaccess.thecvf.com/content_cvpr_2017/papers/Shrivastava_Learning_From_Simulated_CVPR_2017_paper.pdf). The size of the buffer is controlled by the flag `--pool_size`.
   * [visualizer.py](../util/visualizer.py) includes several functions that can display/save images and print/save logging information. It uses a Python library `visdom` for display and a Python library `dominate` (wrapped in `HTML`) for creating HTML files with images.
   * [util.py](../util/util.py) consists of simple helper functions such as `tensor2im` (convert a tensor array to a numpy image array), `diagnose_network` (calculate and print the mean of average absolute value of gradients), and `mkdirs` (create multiple directories).
diff --git a/options/base_options.py b/options/base_options.py
@@ -7,7 +7,7 @@
 
 
 class BaseOptions():
-    """This class defines options that are used during both training and test time.
+    """This class defines options used during both training and test time.
 
     It also implements several helper functions such as parsing, printing, and saving the options.
     It also gathers additional options defined in <modify_commandline_options> functions in both dataset class and model class.
@@ -59,11 +59,11 @@ def initialize(self, parser):
 
     def gather_options(self):
         """Initialize our parser with basic options(only once).
-        Add additional model - specific and dataset - specific options.
-        These options are difined in the < modify_commandline_options > function
+        Add additional model-specific and dataset-specific options.
+        These options are difined in the <modify_commandline_options> function
         in model and dataset classes.
         """
-        if not self.initialized:  # check if it has been initalized
+        if not self.initialized:  # check if it has been initialized
             parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
             parser = self.initialize(parser)
 
diff --git a/options/test_options.py b/options/test_options.py
@@ -2,7 +2,7 @@
 
 
 class TestOptions(BaseOptions):
-    """This class includes options that are only used during test time.
+    """This class includes test options.
 
     It also includes shared options defined in BaseOptions.
     """
diff --git a/options/train_options.py b/options/train_options.py
@@ -2,7 +2,7 @@
 
 
 class TrainOptions(BaseOptions):
-    """This class includes options that are only used during training time.
+    """This class includes training options.
 
     It also includes shared options defined in BaseOptions.
     """
diff --git a/util/get_data.py b/util/get_data.py
@@ -81,18 +81,15 @@ def get(self, save_path, dataset=None):
 
         Download a dataset.
 
-        Args:
-            save_path : str
-                A directory to save the data to.
-            dataset : str, optional
-                A specific dataset to download.
-                Note: this must include the file extension.
-                If None, options will be presented for you
-                to choose from.
+        Parameters:
+            save_path (str) -- A directory to save the data to.
+            dataset (str)   -- (optional). A specific dataset to download.
+                            Note: this must include the file extension.
+                            If None, options will be presented for you
+                            to choose from.
 
         Returns:
-            save_path_full : str
-                The absolute path to the downloaded data.
+            save_path_full (str) -- the absolute path to the downloaded data.
 
         """
         if dataset is None:
diff --git a/util/html.py b/util/html.py
@@ -8,7 +8,7 @@ class HTML:
 
      It consists of functions such as <add_header> (add a text header to the HTML file),
      <add_images> (add a row of images to the HTML file), and <save> (save the HTML to the disk).
-     It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using an DOM API.
+     It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API.
     """
 
     def __init__(self, web_dir, title, refresh=0):
diff --git a/util/image_pool.py b/util/image_pool.py
@@ -28,7 +28,7 @@ def query(self, images):
 
         Returns images from the buffer.
 
-        By 50/100, the buffer will just return the input images.
+        By 50/100, the buffer will return input images.
         By 50/100, the buffer will return images previously stored in the buffer,
         and insert the current images to the buffer.
         """
diff --git a/util/util.py b/util/util.py
@@ -74,7 +74,11 @@ def print_numpy(x, val=True, shp=False):
 
 
 def mkdirs(paths):
-    """create empty directories if they don't exist"""
+    """create empty directories if they don't exist
+
+    Parameters:
+        paths (str list) -- a list of directory paths
+    """
     if isinstance(paths, list) and not isinstance(paths, str):
         for path in paths:
             mkdir(path)
@@ -83,6 +87,10 @@ def mkdirs(paths):
 
 
 def mkdir(path):
-    """create a single empty directory if it didn't exist"""
+    """create a single empty directory if it didn't exist
+
+    Parameters:
+        path (str) -- a single directory path
+    """
     if not os.path.exists(path):
         os.makedirs(path)
diff --git a/util/visualizer.py b/util/visualizer.py