Skip to content

Commit c047c55

Browse files
committed
renaming: resize_or_crop -> preprocess
1 parent 7eb2922 commit c047c55

File tree

7 files changed

+58
-41
lines changed

7 files changed

+58
-41
lines changed

data/__init__.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""This package includes all the modules related to data loading and Preprocessing
1+
"""This package includes all the modules related to data loading and preprocessing
22
33
To add a custom dataset class called dummy, you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
44
You need to implement four functions:
@@ -15,7 +15,7 @@
1515

1616

1717
def find_dataset_using_name(dataset_name):
18-
"""Import the module "data/datasetname_dataset.py" given the option --dataset_mode [datasetname].
18+
"""Import the module "data/[datasetname]_dataset.py" given the option --dataset_mode [datasetname].
1919
2020
In the file, the class called DatasetNameDataset() will
2121
be instantiated. It has to be a subclass of BaseDataset,
@@ -32,23 +32,26 @@ def find_dataset_using_name(dataset_name):
3232
dataset = cls
3333

3434
if dataset is None:
35-
print("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
36-
exit(0)
35+
raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
3736

3837
return dataset
3938

4039

4140
def get_option_setter(dataset_name):
42-
"""Return the modify_commandline_options of the dataset class."""
41+
"""Return the static method <modify_commandline_options> of the dataset class."""
4342
dataset_class = find_dataset_using_name(dataset_name)
4443
return dataset_class.modify_commandline_options
4544

4645

4746
def create_dataset(opt):
48-
"""Create dataloader given the option.
47+
"""Create dataset given the option.
4948
5049
This function warps the class CustomDatasetDataLoader.
5150
This is the main interface called by train.py and test.py.
51+
52+
Example:
53+
from data import create_dataset
54+
dataset = create_dataset(opt)
5255
"""
5356
data_loader = CustomDatasetDataLoader(opt)
5457
dataset = data_loader.load_data()
@@ -58,10 +61,12 @@ def create_dataset(opt):
5861
class CustomDatasetDataLoader():
5962
"""Wrapper class of Dataset class that performs multi-threaded data loading"""
6063

61-
def name(self):
62-
return 'CustomDatasetDataLoader'
63-
6464
def __init__(self, opt):
65+
"""Initialize this class
66+
67+
It first create a dataset instance given the name [dataset_mode]
68+
It then create a multi-threaded data loader.
69+
"""
6570
self.opt = opt
6671
dataset_class = find_dataset_using_name(opt.dataset_mode)
6772
self.dataset = dataset_class(opt)
@@ -76,9 +81,11 @@ def load_data(self):
7681
return self
7782

7883
def __len__(self):
84+
"""Return the number of data in the dataset"""
7985
return min(len(self.dataset), self.opt.max_dataset_size)
8086

8187
def __iter__(self):
88+
"""Return a batch of data"""
8289
for i, data in enumerate(self.dataloader):
8390
if i * self.opt.batch_size >= self.opt.max_dataset_size:
8491
break

data/aligned_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __getitem__(self, index):
4747
A = AB.crop((0, 0, w2, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
4848
B = AB.crop((w2, 0, w, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
4949
# apply the same cropping to both A and B
50-
if 'crop' in self.opt.resize_or_crop:
50+
if 'crop' in self.opt.preprocess:
5151
x, y, h, w = transforms.RandomCrop.get_params(A, output_size=[self.opt.crop_size, self.opt.crop_size])
5252
A = A.crop((x, y, w, h))
5353
B = B.crop((x, y, w, h))
@@ -62,5 +62,5 @@ def __getitem__(self, index):
6262
return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
6363

6464
def __len__(self):
65-
"""Return the total number of images."""
65+
"""Return the total number of images in the dataset."""
6666
return len(self.AB_paths)

data/base_dataset.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def modify_commandline_options(parser, is_train):
1515

1616
@abstractmethod
1717
def __len__(self):
18+
"""Return the total number of images in the dataset."""
1819
return 0
1920

2021
@abstractmethod
@@ -23,25 +24,30 @@ def __getitem__(self, index):
2324

2425

2526
def get_transform(opt, grayscale=False, convert=True, crop=True, flip=True):
27+
"""Create a torchvision transformation function
28+
29+
The type of transformation is defined by option (e.g., [preprocess], [load_size], [crop_size])
30+
and can be overwritten by arguments such as [convert], [crop], and [flip]
31+
"""
2632
transform_list = []
2733
if grayscale:
2834
transform_list.append(transforms.Grayscale(1))
29-
if opt.resize_or_crop == 'resize_and_crop':
35+
if opt.preprocess == 'resize_and_crop':
3036
osize = [opt.load_size, opt.load_size]
3137
transform_list.append(transforms.Resize(osize, Image.BICUBIC))
3238
transform_list.append(transforms.RandomCrop(opt.crop_size))
33-
elif opt.resize_or_crop == 'crop' and crop:
39+
elif opt.preprocess == 'crop' and crop:
3440
transform_list.append(transforms.RandomCrop(opt.crop_size))
35-
elif opt.resize_or_crop == 'scale_width':
41+
elif opt.preprocess == 'scale_width':
3642
transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.crop_size)))
37-
elif opt.resize_or_crop == 'scale_width_and_crop':
43+
elif opt.preprocess == 'scale_width_and_crop':
3844
transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size)))
3945
if crop:
4046
transform_list.append(transforms.RandomCrop(opt.crop_size))
41-
elif opt.resize_or_crop == 'none':
47+
elif opt.preprocess == 'none':
4248
transform_list.append(transforms.Lambda(lambda img: __adjust(img)))
4349
else:
44-
raise ValueError('--resize_or_crop %s is not a valid option.' % opt.resize_or_crop)
50+
raise ValueError('--preprocess %s is not a valid option.' % opt.preprocess)
4551

4652
if not opt.no_flip and flip:
4753
transform_list.append(transforms.RandomHorizontalFlip())
@@ -75,7 +81,7 @@ def __adjust(img):
7581

7682

7783
def __scale_width(img, target_width):
78-
"""Resize images so that the output image width is the same as target width
84+
"""Resize images so that the width of the output image is the same as a target width
7985
8086
the size needs to be a multiple of 4,
8187
because going through generator network may change img size

docs/qa.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ The current code only works with PyTorch 0.4+. An earlier PyTorch version can of
3232
#### ValueError: empty range for randrange() ([#390](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/390), [#376](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/376), [#194](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/194))
3333
Similar error messages include "ConnectionRefusedError: [Errno 111] Connection refused"
3434

35-
It is related to data augmentation step. It often happens when you use `--resize_or_crop crop`. The program will crop random `crop_size x crop_size` patches out of the input training images. But if some of your image sizes (e.g., `256x384`) are smaller than the `crop_size` (e.g., 512), you will get this error. A simple fix will be to use other data augmentation methods such as `--resize_and_crop` or `scale_width_and_crop`. Our program will automatically resize the images according to `load_size` before apply `crop_size x crop_size` cropping. Make sure that `load_size >= crop_size`.
35+
It is related to data augmentation step. It often happens when you use `--preprocess crop`. The program will crop random `crop_size x crop_size` patches out of the input training images. But if some of your image sizes (e.g., `256x384`) are smaller than the `crop_size` (e.g., 512), you will get this error. A simple fix will be to use other data augmentation methods such as `--resize_and_crop` or `scale_width_and_crop`. Our program will automatically resize the images according to `load_size` before apply `crop_size x crop_size` cropping. Make sure that `load_size >= crop_size`.
3636

3737

3838
#### Can I continue/resume my training? ([#350](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/350), [#275](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/275), [#234](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/234), [#87](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/87))
@@ -65,9 +65,9 @@ Yes, you can download pretrained models with the bash script `./scripts/download
6565
#### Out of memory ([#174](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/174))
6666
CycleGAN is more memory-intensive than pix2pix as it requires two generators and two discriminators. If you would like to produce high-resolution images, you can do the following.
6767

68-
- During training, train CycleGAN on cropped images of the training set. Please be careful not to change the aspect ratio or the scale of the original image, as this can lead to the training/test gap. You can usually do this by using `--resize_or_crop crop` option, or `--resize_or_crop scale_width_and_crop`.
68+
- During training, train CycleGAN on cropped images of the training set. Please be careful not to change the aspect ratio or the scale of the original image, as this can lead to the training/test gap. You can usually do this by using `--preprocess crop` option, or `--preprocess scale_width_and_crop`.
6969

70-
- Then at test time, you can load only one generator to produce the results in a single direction. This greatly saves GPU memory as you are not loading the discriminators and the other generator in the opposite direction. You can probably take the whole image as input. You can do this using `--model test --dataroot [path to the directory that contains your test images (e.g., ./datasets/horse2zebra/trainA)] --model_suffix _A --resize_or_crop none`. You can use either `--resize_or_crop none` or `--resize_or_crop scale_width --crop_size [your_desired_image_width]`. Please see the [model_suffix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/test_model.py#L16) and [resize_or_crop](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/data/base_dataset.py#L24) for more details.
70+
- Then at test time, you can load only one generator to produce the results in a single direction. This greatly saves GPU memory as you are not loading the discriminators and the other generator in the opposite direction. You can probably take the whole image as input. You can do this using `--model test --dataroot [path to the directory that contains your test images (e.g., ./datasets/horse2zebra/trainA)] --model_suffix _A --preprocess none`. You can use either `--preprocess none` or `--preprocess scale_width --crop_size [your_desired_image_width]`. Please see the [model_suffix](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/test_model.py#L16) and [preprocess](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/data/base_dataset.py#L24) for more details.
7171

7272
#### What is the identity loss? ([#322](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/322), [#373](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/issues/373), [#362](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/pull/362))
7373
We use the identity loss for our photo to painting application. The identity loss can regularize the generator to be close to an identity mapping when fed with real samples from the *target* domain. If something already looks like from the target domain, you should preserve the image without making additional changes. The generator trained with this loss will often be more conservative for unknown content. Please see more details in Sec 5.2 ''Photo generation from paintings'' and Figure 12 in the CycleGAN [paper](https://arxiv.org/pdf/1703.10593.pdf). The loss was first proposed in the Equation 6 of the prior work [[Taigman et al., 2017]](https://arxiv.org/pdf/1611.02200.pdf).

docs/tips.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Please set`--gpu_ids -1` to use CPU mode; set `--gpu_ids 0,1,2` for multi-GPU mo
88
During training, the current results can be viewed using two methods. First, if you set `--display_id` > 0, the results and loss plot will appear on a local graphics web server launched by [visdom](https://github.com/facebookresearch/visdom). To do this, you should have `visdom` installed and a server running by the command `python -m visdom.server`. The default server URL is `http://localhost:8097`. `display_id` corresponds to the window ID that is displayed on the `visdom` server. The `visdom` display functionality is turned on by default. To avoid the extra overhead of communicating with `visdom` set `--display_id -1`. Second, the intermediate results are saved to `[opt.checkpoints_dir]/[opt.name]/web/` as an HTML file. To avoid this, set `--no_html`.
99

1010
#### Preprocessing
11-
Images can be resized and cropped in different ways using `--resize_or_crop` option. The default option `'resize_and_crop'` resizes the image to be of size `(opt.load_size, opt.load_size)` and does a random crop of size `(opt.crop_size, opt.crop_size)`. `'crop'` skips the resizing step and only performs random cropping. `'scale_width'` resizes the image to have width `opt.crop_size` while keeping the aspect ratio. `'scale_width_and_crop'` first resizes the image to have width `opt.load_size` and then does random cropping of size `(opt.crop_size, opt.crop_size)`. `'none'` tries to skip all these preprocessing steps. However, if the image size is not a multiple of some number depending on the number of downsamplings of the generator, you will get an error because the size of the output image may be different from the size of the input image. Therefore, `'none'` option still tries to adjust the image size to be a multiple of 4. You might need a bigger adjustment if you change the generator architecture. Please see `data/base_datset.py` do see how all these were implemented.
11+
Images can be resized and cropped in different ways using `--preprocess` option. The default option `'resize_and_crop'` resizes the image to be of size `(opt.load_size, opt.load_size)` and does a random crop of size `(opt.crop_size, opt.crop_size)`. `'crop'` skips the resizing step and only performs random cropping. `'scale_width'` resizes the image to have width `opt.crop_size` while keeping the aspect ratio. `'scale_width_and_crop'` first resizes the image to have width `opt.load_size` and then does random cropping of size `(opt.crop_size, opt.crop_size)`. `'none'` tries to skip all these preprocessing steps. However, if the image size is not a multiple of some number depending on the number of downsamplings of the generator, you will get an error because the size of the output image may be different from the size of the input image. Therefore, `'none'` option still tries to adjust the image size to be a multiple of 4. You might need a bigger adjustment if you change the generator architecture. Please see `data/base_datset.py` do see how all these were implemented.
1212

1313
#### Fine-tuning/resume training
1414
To fine-tune a pre-trained model, or resume the previous training, use the `--continue_train` flag. The program will then load the model based on `epoch`. By default, the program will initialize the epoch count as 1. Set `--epoch_count <int>` to specify a different starting epoch count.
@@ -33,10 +33,10 @@ This will combine each pair of images (A,B) into a single image file, ready for
3333

3434

3535
#### About image size
36-
Since the generator architecture in CycleGAN involves a series of downsampling / upsampling operations, the size of the input and output image may not match if the input image size is not a multiple of 4. As a result, you may get a runtime error because the L1 identity loss cannot be enforced with images of different size. Therefore, we slightly resize the image to become multiples of 4 even with `--resize_or_crop none` option. For the same reason, `--crop_size` needs to be a multiple of 4.
36+
Since the generator architecture in CycleGAN involves a series of downsampling / upsampling operations, the size of the input and output image may not match if the input image size is not a multiple of 4. As a result, you may get a runtime error because the L1 identity loss cannot be enforced with images of different size. Therefore, we slightly resize the image to become multiples of 4 even with `--preprocess none` option. For the same reason, `--crop_size` needs to be a multiple of 4.
3737

3838
#### Training/Testing with high res images
39-
CycleGAN is quite memory-intensive as four networks (two generators and two discriminators) need to be loaded on one GPU, so a large image cannot be entirely loaded. In this case, we recommend training with cropped images. For example, to generate 1024px results, you can train with `--resize_or_crop scale_width_and_crop --load_size 1024 --crop_size 360`, and test with `--resize_or_crop scale_width --crop_size 1024`. This way makes sure the training and test will be at the same scale. At test time, you can afford higher resolution because you don’t need to load all networks.
39+
CycleGAN is quite memory-intensive as four networks (two generators and two discriminators) need to be loaded on one GPU, so a large image cannot be entirely loaded. In this case, we recommend training with cropped images. For example, to generate 1024px results, you can train with `--preprocess scale_width_and_crop --load_size 1024 --crop_size 360`, and test with `--preprocess scale_width --crop_size 1024`. This way makes sure the training and test will be at the same scale. At test time, you can afford higher resolution because you don’t need to load all networks.
4040

4141
#### About loss curve
4242
Unfortunately, the loss curve does not reveal much information in training GANs, and CycleGAN is no exception. To check whether the training has converged or not, we recommend periodically generating a few samples and looking at them.

models/base_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(self, opt):
1616
self.isTrain = opt.isTrain
1717
self.device = torch.device('cuda:{}'.format(self.gpu_ids[0])) if self.gpu_ids else torch.device('cpu')
1818
self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
19-
if opt.resize_or_crop != 'scale_width':
19+
if opt.preprocess != 'scale_width': # with preprocessing option [scale_width], the input image might have different sizes, which will hurt the performance of cudnn.benchmark.
2020
torch.backends.cudnn.benchmark = True
2121
self.loss_names = []
2222
self.model_names = []

0 commit comments

Comments
 (0)