renaming: resize_or_crop -> preprocess

junyanz · junyanz · commit c047c55b212c · 2019-01-03T15:08:30.000-05:00
diff --git a/data/__init__.py b/data/__init__.py
@@ -1,4 +1,4 @@
-"""This package includes all the modules related to data loading and Preprocessing
+"""This package includes all the modules related to data loading and preprocessing
 
  To add a custom dataset class called dummy, you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
  You need to implement four functions:
@@ -15,7 +15,7 @@
 
 
 def find_dataset_using_name(dataset_name):
-    """Import the module "data/datasetname_dataset.py" given the option --dataset_mode [datasetname].
+    """Import the module "data/[datasetname]_dataset.py" given the option --dataset_mode [datasetname].
 
     In the file, the class called DatasetNameDataset() will
     be instantiated. It has to be a subclass of BaseDataset,
@@ -32,23 +32,26 @@ def find_dataset_using_name(dataset_name):
             dataset = cls
 
     if dataset is None:
-        print("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
-        exit(0)
+        raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
 
     return dataset
 
 
 def get_option_setter(dataset_name):
-    """Return the modify_commandline_options of the dataset class."""
+    """Return the static method <modify_commandline_options> of the dataset class."""
     dataset_class = find_dataset_using_name(dataset_name)
     return dataset_class.modify_commandline_options
 
 
 def create_dataset(opt):
-    """Create dataloader given the option.
+    """Create dataset given the option.
 
     This function warps the class CustomDatasetDataLoader.
     This is the main interface called by train.py and test.py.
+
+    Example:
+    from data import create_dataset
+    dataset = create_dataset(opt)
     """
     data_loader = CustomDatasetDataLoader(opt)
     dataset = data_loader.load_data()
@@ -58,10 +61,12 @@ def create_dataset(opt):
 class CustomDatasetDataLoader():
     """Wrapper class of Dataset class that performs multi-threaded data loading"""
 
-    def name(self):
-        return 'CustomDatasetDataLoader'
-
     def __init__(self, opt):
+        """Initialize this class
+
+        It first create a dataset instance given the name [dataset_mode]
+        It then create a multi-threaded data loader.
+        """
         self.opt = opt
         dataset_class = find_dataset_using_name(opt.dataset_mode)
         self.dataset = dataset_class(opt)
@@ -76,9 +81,11 @@ def load_data(self):
         return self
 
     def __len__(self):
+        """Return the number of data in the dataset"""
         return min(len(self.dataset), self.opt.max_dataset_size)
 
     def __iter__(self):
+        """Return a batch of data"""
         for i, data in enumerate(self.dataloader):
             if i * self.opt.batch_size >= self.opt.max_dataset_size:
                 break
diff --git a/data/aligned_dataset.py b/data/aligned_dataset.py
@@ -47,7 +47,7 @@ def __getitem__(self, index):
         A = AB.crop((0, 0, w2, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
         B = AB.crop((w2, 0, w, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
         # apply the same cropping to both A and B
-        if 'crop' in self.opt.resize_or_crop:
+        if 'crop' in self.opt.preprocess:
             x, y, h, w = transforms.RandomCrop.get_params(A, output_size=[self.opt.crop_size, self.opt.crop_size])
             A = A.crop((x, y, w, h))
             B = B.crop((x, y, w, h))
@@ -62,5 +62,5 @@ def __getitem__(self, index):
         return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
 
     def __len__(self):
-        """Return the total number of images."""
+        """Return the total number of images in the dataset."""
         return len(self.AB_paths)
diff --git a/data/base_dataset.py b/data/base_dataset.py
@@ -15,6 +15,7 @@ def modify_commandline_options(parser, is_train):
 
     @abstractmethod
     def __len__(self):
+        """Return the total number of images in the dataset."""
         return 0
 
     @abstractmethod
@@ -23,25 +24,30 @@ def __getitem__(self, index):
 
 
 def get_transform(opt, grayscale=False, convert=True, crop=True, flip=True):
+    """Create a torchvision transformation function
+
+    The type of transformation is defined by option (e.g., [preprocess], [load_size], [crop_size])
+    and can be overwritten by arguments such as [convert], [crop], and [flip]
+    """
     transform_list = []
     if grayscale:
         transform_list.append(transforms.Grayscale(1))
-    if opt.resize_or_crop == 'resize_and_crop':
+    if opt.preprocess == 'resize_and_crop':
         osize = [opt.load_size, opt.load_size]
         transform_list.append(transforms.Resize(osize, Image.BICUBIC))
         transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.resize_or_crop == 'crop' and crop:
+    elif opt.preprocess == 'crop' and crop:
         transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.resize_or_crop == 'scale_width':
+    elif opt.preprocess == 'scale_width':
         transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.crop_size)))
-    elif opt.resize_or_crop == 'scale_width_and_crop':
+    elif opt.preprocess == 'scale_width_and_crop':
         transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size)))
         if crop:
             transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.resize_or_crop == 'none':
+    elif opt.preprocess == 'none':
         transform_list.append(transforms.Lambda(lambda img: __adjust(img)))
     else:
-        raise ValueError('--resize_or_crop %s is not a valid option.' % opt.resize_or_crop)
+        raise ValueError('--preprocess %s is not a valid option.' % opt.preprocess)
 
     if not opt.no_flip and flip:
         transform_list.append(transforms.RandomHorizontalFlip())
@@ -75,7 +81,7 @@ def __adjust(img):
 
 
 def __scale_width(img, target_width):
-    """Resize images so that the output image width is the same as target width
+    """Resize images so that the width of the output image is the same as a target width
 
     the size needs to be a multiple of 4,
     because going through generator network may change img size
diff --git a/docs/qa.md b/docs/qa.md
@@ -32,7 +32,7 @@ The current code only works with PyTorch 0.4+. An earlier PyTorch version can of
 #### ValueError: empty range for randrange() ([#390](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/390), [#376](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/376), [#194](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/194))
 Similar error messages include "ConnectionRefusedError: [Errno 111] Connection refused"
 
-It is related to data augmentation step. It often happens when you use `--resize_or_crop crop`. The program will crop random `crop_size x crop_size` patches out of the input training images. But if some of your image sizes (e.g., `256x384`) are smaller than the `crop_size` (e.g., 512), you will get this error. A simple fix will be to use other data augmentation methods such as `--resize_and_crop` or `scale_width_and_crop`.  Our program will automatically resize the images according to `load_size` before apply `crop_size x crop_size` cropping. Make sure that `load_size >= crop_size`.
+It is related to data augmentation step. It often happens when you use `--preprocess crop`. The program will crop random `crop_size x crop_size` patches out of the input training images. But if some of your image sizes (e.g., `256x384`) are smaller than the `crop_size` (e.g., 512), you will get this error. A simple fix will be to use other data augmentation methods such as `--resize_and_crop` or `scale_width_and_crop`.  Our program will automatically resize the images according to `load_size` before apply `crop_size x crop_size` cropping. Make sure that `load_size >= crop_size`.
 
 
 #### Can I continue/resume my training? ([#350](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/350), [#275](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/275), [#234](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/234), [#87](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/87))
@@ -65,9 +65,9 @@ Yes, you can download pretrained models with the bash script `./scripts/download
 #### Out of memory ([#174](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/174))
 CycleGAN is more memory-intensive than pix2pix as it requires two generators and two discriminators. If you would like to produce high-resolution images, you can do the following.
 
-- During training, train CycleGAN on cropped images of the training set. Please be careful not to change the aspect ratio or the scale of the original image, as this can lead to the training/test gap. You can usually do this by using `--resize_or_crop crop` option, or `--resize_or_crop scale_width_and_crop`.
+- During training, train CycleGAN on cropped images of the training set. Please be careful not to change the aspect ratio or the scale of the original image, as this can lead to the training/test gap. You can usually do this by using `--preprocess crop` option, or `--preprocess scale_width_and_crop`.
 
-- Then at test time, you can load only one generator to produce the results in a single direction. This greatly saves GPU memory as you are not loading the discriminators and the other generator in the opposite direction. You can probably take the whole image as input. You can do this using `--model test --dataroot [path to the directory that contains your test images (e.g., ./datasets/horse2zebra/trainA)] --model_suffix _A --resize_or_crop none`. You can use either `--resize_or_crop none` or `--resize_or_crop scale_width --crop_size [your_desired_image_width]`. Please see the [model_suffix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/test_model.py#L16) and [resize_or_crop](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/data/base_dataset.py#L24) for more details.
+- Then at test time, you can load only one generator to produce the results in a single direction. This greatly saves GPU memory as you are not loading the discriminators and the other generator in the opposite direction. You can probably take the whole image as input. You can do this using `--model test --dataroot [path to the directory that contains your test images (e.g., ./datasets/horse2zebra/trainA)] --model_suffix _A --preprocess none`. You can use either `--preprocess none` or `--preprocess scale_width --crop_size [your_desired_image_width]`. Please see the [model_suffix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/test_model.py#L16) and [preprocess](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/data/base_dataset.py#L24) for more details.
 
 #### What is the identity loss? ([#322](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/322), [#373](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/373), [#362](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/pull/362))
 We use the identity loss for our photo to painting application. The identity loss can regularize the generator to be close to an identity mapping when fed with real samples from the *target* domain. If something already looks like from the target domain, you should preserve the image without making additional changes. The generator trained with this loss will often be more conservative for unknown content. Please see more details in Sec 5.2 ''Photo generation from paintings'' and  Figure 12 in the CycleGAN [paper](https://arxiv.org/pdf/1703.10593.pdf). The loss was first proposed in the Equation 6 of the prior work [[Taigman et al., 2017]](https://arxiv.org/pdf/1611.02200.pdf).
diff --git a/docs/tips.md b/docs/tips.md
@@ -8,7 +8,7 @@ Please set`--gpu_ids -1` to use CPU mode; set `--gpu_ids 0,1,2` for multi-GPU mo
 During training, the current results can be viewed using two methods. First, if you set `--display_id` > 0, the results and loss plot will appear on a local graphics web server launched by [visdom](https://github.com/facebookresearch/visdom). To do this, you should have `visdom` installed and a server running by the command `python -m visdom.server`. The default server URL is `http://localhost:8097`. `display_id` corresponds to the window ID that is displayed on the `visdom` server. The `visdom` display functionality is turned on by default. To avoid the extra overhead of communicating with `visdom` set `--display_id -1`. Second, the intermediate results are saved to `[opt.checkpoints_dir]/[opt.name]/web/` as an HTML file. To avoid this, set `--no_html`.
 
 #### Preprocessing
- Images can be resized and cropped in different ways using `--resize_or_crop` option. The default option `'resize_and_crop'` resizes the image to be of size `(opt.load_size, opt.load_size)` and does a random crop of size `(opt.crop_size, opt.crop_size)`. `'crop'` skips the resizing step and only performs random cropping. `'scale_width'` resizes the image to have width `opt.crop_size` while keeping the aspect ratio. `'scale_width_and_crop'` first resizes the image to have width `opt.load_size` and then does random cropping of size `(opt.crop_size, opt.crop_size)`. `'none'` tries to skip all these preprocessing steps. However, if the image size is not a multiple of some number depending on the number of downsamplings of the generator, you will get an error because the size of the output image may be different from the size of the input image. Therefore, `'none'` option still tries to adjust the image size to be a multiple of 4. You might need a bigger adjustment if you change the generator architecture. Please see `data/base_datset.py` do see how all these were implemented.
+ Images can be resized and cropped in different ways using `--preprocess` option. The default option `'resize_and_crop'` resizes the image to be of size `(opt.load_size, opt.load_size)` and does a random crop of size `(opt.crop_size, opt.crop_size)`. `'crop'` skips the resizing step and only performs random cropping. `'scale_width'` resizes the image to have width `opt.crop_size` while keeping the aspect ratio. `'scale_width_and_crop'` first resizes the image to have width `opt.load_size` and then does random cropping of size `(opt.crop_size, opt.crop_size)`. `'none'` tries to skip all these preprocessing steps. However, if the image size is not a multiple of some number depending on the number of downsamplings of the generator, you will get an error because the size of the output image may be different from the size of the input image. Therefore, `'none'` option still tries to adjust the image size to be a multiple of 4. You might need a bigger adjustment if you change the generator architecture. Please see `data/base_datset.py` do see how all these were implemented.
 
 #### Fine-tuning/resume training
 To fine-tune a pre-trained model, or resume the previous training, use the `--continue_train` flag. The program will then load the model based on `epoch`. By default, the program will initialize the epoch count as 1. Set `--epoch_count <int>` to specify a different starting epoch count.
@@ -33,10 +33,10 @@ This will combine each pair of images (A,B) into a single image file, ready for
 
 
 #### About image size
- Since the generator architecture in CycleGAN involves a series of downsampling / upsampling operations, the size of the input and output image may not match if the input image size is not a multiple of 4. As a result, you may get a runtime error because the L1 identity loss cannot be enforced with images of different size. Therefore, we slightly resize the image to become multiples of 4 even with `--resize_or_crop none` option. For the same reason, `--crop_size` needs to be a multiple of 4.
+ Since the generator architecture in CycleGAN involves a series of downsampling / upsampling operations, the size of the input and output image may not match if the input image size is not a multiple of 4. As a result, you may get a runtime error because the L1 identity loss cannot be enforced with images of different size. Therefore, we slightly resize the image to become multiples of 4 even with `--preprocess none` option. For the same reason, `--crop_size` needs to be a multiple of 4.
 
 #### Training/Testing with high res images
-CycleGAN is quite memory-intensive as four networks (two generators and two discriminators) need to be loaded on one GPU, so a large image cannot be entirely loaded. In this case, we recommend training with cropped images. For example, to generate 1024px results, you can train with `--resize_or_crop scale_width_and_crop --load_size 1024 --crop_size 360`, and test with `--resize_or_crop scale_width --crop_size 1024`. This way makes sure the training and test will be at the same scale. At test time, you can afford higher resolution because you don’t need to load all networks.
+CycleGAN is quite memory-intensive as four networks (two generators and two discriminators) need to be loaded on one GPU, so a large image cannot be entirely loaded. In this case, we recommend training with cropped images. For example, to generate 1024px results, you can train with `--preprocess scale_width_and_crop --load_size 1024 --crop_size 360`, and test with `--preprocess scale_width --crop_size 1024`. This way makes sure the training and test will be at the same scale. At test time, you can afford higher resolution because you don’t need to load all networks.
 
 #### About loss curve
 Unfortunately, the loss curve does not reveal much information in training GANs, and CycleGAN is no exception. To check whether the training has converged or not, we recommend periodically generating a few samples and looking at them.
diff --git a/models/base_model.py b/models/base_model.py
@@ -16,7 +16,7 @@ def __init__(self, opt):
         self.isTrain = opt.isTrain
         self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')
         self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
-        if opt.resize_or_crop != 'scale_width':
+        if opt.preprocess != 'scale_width':  # with preprocessing option [scale_width], the input image might have different sizes, which will hurt the performance of cudnn.benchmark.
             torch.backends.cudnn.benchmark = True
         self.loss_names = []
         self.model_names = []
diff --git a/options/base_options.py b/options/base_options.py