improve dataset class: max_dataset_size and aligned_dataset

junyanz · junyanz · commit cbe085758f63 · 2019-01-03T13:46:52.000-05:00
diff --git a/data/__init__.py b/data/__init__.py
@@ -1,3 +1,4 @@
+"""This package """
 import importlib
 import torch.utils.data
 from data.base_dataset import BaseDataset
diff --git a/data/aligned_dataset.py b/data/aligned_dataset.py
@@ -1,45 +1,67 @@
 import os.path
 import random
-from data.base_dataset import BaseDataset, get_simple_transform
-import torchvision.transforms.functional as TF
+from data.base_dataset import BaseDataset, get_transform
 import torchvision.transforms as transforms
 from data.image_folder import make_dataset
 from PIL import Image
 
 
 class AlignedDataset(BaseDataset):
-    @staticmethod
-    def modify_commandline_options(parser, is_train):
-        return parser
+    """A dataset class for paired image dataset.
+
+    It assumes that the directory '/path/to/data/train' contains image pairs in the form of {A,B}.
+    During test time, you need to prepare a directory /path/to/data/test.
+    """
 
     def __init__(self, opt):
+        """Initialize this dataset class."""
         BaseDataset.__init__(self, opt)
-        self.dir_AB = os.path.join(opt.dataroot, opt.phase)
-        self.AB_paths = sorted(make_dataset(self.dir_AB))
-        assert(opt.resize_or_crop == 'resize_and_crop')  # only support this mode
-        assert(self.opt.load_size >= self.opt.crop_size)
+        self.dir_AB = os.path.join(opt.dataroot, opt.phase)  # get the image directory
+        self.AB_paths = sorted(make_dataset(self.dir_AB, opt.max_dataset_size))  # get image paths
+        assert(opt.resize_or_crop == 'resize_and_crop')    # only support this mode
+        assert(self.opt.load_size >= self.opt.crop_size)   # crop_size should be smaller than the size of loaded image
         input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc
         output_nc = self.opt.input_nc if self.opt.direction == 'BtoA' else self.opt.output_nc
-        self.transform_A = get_simple_transform(grayscale=(input_nc == 1))
-        self.transform_B = get_simple_transform(grayscale=(output_nc == 1))
+        # we manually crop and flip in __getitem__ to make sure we apply the same crop and flip for image A and B
+        # we disable the cropping and flipping in the function get_transform
+        self.transform_A = get_transform(opt, grayscale=(input_nc == 1), crop=False, flip=False)
+        self.transform_B = get_transform(opt, grayscale=(output_nc == 1), crop=False, flip=False)
 
     def __getitem__(self, index):
+        """Return a data point and its metadata information.
+
+        Parameters:
+            index - - a random integer for data indexing
+
+        Returns a dictionary of A, B, A_paths and B_paths
+            A(tensor) - - an image in the input domain
+            B(tensor) - - its corresponding image in the target domain
+            A_paths(str) - - image paths
+            B_paths(str) - - image paths
+        """
+        # read a image given a random integer index
         AB_path = self.AB_paths[index]
         AB = Image.open(AB_path).convert('RGB')
+        # split AB image into A and B
         w, h = AB.size
         w2 = int(w / 2)
-        A0 = AB.crop((0, 0, w2, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
-        B0 = AB.crop((w2, 0, w, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
-        x, y, h, w = transforms.RandomCrop.get_params(A0, output_size=[self.opt.crop_size, self.opt.crop_size])
-        A = TF.crop(A0, x, y, h, w)
-        B = TF.crop(B0, x, y, h, w)
-
+        A = AB.crop((0, 0, w2, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
+        B = AB.crop((w2, 0, w, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
+        # apply the same cropping to both A and B
+        if 'crop' in self.opt.resize_or_crop:
+            x, y, h, w = transforms.RandomCrop.get_params(A, output_size=[self.opt.crop_size, self.opt.crop_size])
+            A = A.crop((x, y, w, h))
+            B = B.crop((x, y, w, h))
+        # apply the same flipping to both A and B
         if (not self.opt.no_flip) and random.random() < 0.5:
-            A = TF.hflip(A)
-            B = TF.hflip(B)
+            A = A.transpose(Image.FLIP_LEFT_RIGHT)
+            B = B.transpose(Image.FLIP_LEFT_RIGHT)
+        # call standard transformation function
         A = self.transform_A(A)
         B = self.transform_B(B)
+        print(AB_path, index)
         return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
 
     def __len__(self):
+        """Return the total number of images."""
         return len(self.AB_paths)
diff --git a/data/base_dataset.py b/data/base_dataset.py
@@ -22,27 +22,28 @@ def __getitem__(self, index):
         pass
 
 
-def get_transform(opt, grayscale=False, convert=True):
+def get_transform(opt, grayscale=False, convert=True, crop=True, flip=True):
     transform_list = []
     if grayscale:
         transform_list.append(transforms.Grayscale(1))
     if opt.resize_or_crop == 'resize_and_crop':
         osize = [opt.load_size, opt.load_size]
         transform_list.append(transforms.Resize(osize, Image.BICUBIC))
         transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.resize_or_crop == 'crop':
+    elif opt.resize_or_crop == 'crop' and crop:
         transform_list.append(transforms.RandomCrop(opt.crop_size))
     elif opt.resize_or_crop == 'scale_width':
         transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.crop_size)))
     elif opt.resize_or_crop == 'scale_width_and_crop':
         transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size)))
-        transform_list.append(transforms.RandomCrop(opt.crop_size))
+        if crop:
+            transform_list.append(transforms.RandomCrop(opt.crop_size))
     elif opt.resize_or_crop == 'none':
         transform_list.append(transforms.Lambda(lambda img: __adjust(img)))
     else:
         raise ValueError('--resize_or_crop %s is not a valid option.' % opt.resize_or_crop)
 
-    if not opt.no_flip:
+    if not opt.no_flip and flip:
         transform_list.append(transforms.RandomHorizontalFlip())
     if convert:
         transform_list += [transforms.ToTensor(),
@@ -51,22 +52,14 @@ def get_transform(opt, grayscale=False, convert=True):
     return transforms.Compose(transform_list)
 
 
-def get_simple_transform(grayscale=False):
-    transform_list = []
-    if grayscale:
-        transform_list.append(transforms.Grayscale(1))
-    transform_list += [transforms.ToTensor(),
-                       transforms.Normalize((0.5, 0.5, 0.5),
-                                            (0.5, 0.5, 0.5))]
-    return transforms.Compose(transform_list)
-
-
 def __adjust(img):
-    """Modify the width and height to be multiple of 4"""
+    """Modify the width and height to be multiple of 4
+
+    the size needs to be a multiple of 4,
+    because going through generator network may change img size
+    and eventually cause size mismatch error
+    """
     ow, oh = img.size
-    # the size needs to be a multiple of this number,
-    # because going through generator network may change img size
-    # and eventually cause size mismatch error
     mult = 4
     if ow % mult == 0 and oh % mult == 0:
         return img
@@ -82,11 +75,14 @@ def __adjust(img):
 
 
 def __scale_width(img, target_width):
+    """Resize images so that the output image width is the same as target width
+
+    the size needs to be a multiple of 4,
+    because going through generator network may change img size
+    and eventually cause size mismatch error
+    """
     ow, oh = img.size
 
-    # the size needs to be a multiple of this number,
-    # because going through generator network may change img size
-    # and eventually cause size mismatch error
     mult = 4
     assert target_width % mult == 0, "the target width needs to be multiple of %d." % mult
     if (ow == target_width and oh % mult == 0):
@@ -103,6 +99,7 @@ def __scale_width(img, target_width):
 
 
 def __print_size_warning(ow, oh, w, h):
+    """Print warning information about image size (only print once)"""
     if not hasattr(__print_size_warning, 'has_printed'):
         print("The image size needs to be a multiple of 4. "
               "The loaded image size was (%d, %d), so it was adjusted to "
diff --git a/data/colorization_dataset.py b/data/colorization_dataset.py
@@ -16,7 +16,7 @@ def modify_commandline_options(parser, is_train):
     def __init__(self, opt):
         BaseDataset.__init__(self, opt)
         self.dir_A = os.path.join(opt.dataroot)
-        self.A_paths = sorted(make_dataset(self.dir_A))
+        self.A_paths = sorted(make_dataset(self.dir_A, opt.max_dataset_size))
         assert(opt.input_nc == 1 and opt.output_nc == 2 and opt.direction == 'AtoB')
         self.transform = get_transform(opt, convert=False)
 
diff --git a/data/image_folder.py b/data/image_folder.py
@@ -20,7 +20,7 @@ def is_image_file(filename):
     return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
 
 
-def make_dataset(dir):
+def make_dataset(dir, max_dataset_size=float("inf")):
     images = []
     assert os.path.isdir(dir), '%s is not a valid directory' % dir
 
@@ -29,8 +29,7 @@ def make_dataset(dir):
             if is_image_file(fname):
                 path = os.path.join(root, fname)
                 images.append(path)
-
-    return images
+    return images[:min(max_dataset_size, len(images))]
 
 
 def default_loader(path):
diff --git a/data/single_dataset.py b/data/single_dataset.py
@@ -10,7 +10,7 @@ def modify_commandline_options(parser, is_train):
 
     def __init__(self, opt):
         BaseDataset.__init__(self, opt)
-        self.A_paths = sorted(make_dataset(opt.dataroot))
+        self.A_paths = sorted(make_dataset(opt.dataroot, opt.max_dataset_size))
         input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc
         self.transform = get_transform(opt, input_nc == 1)
 
diff --git a/data/template_dataset.py b/data/template_dataset.py
@@ -46,7 +46,7 @@ def __init__(self, opt):
         # save the option and dataset root
         BaseDataset.__init__(self, opt)
         # get the image paths of your dataset;
-        self.image_paths = []  # You can call <sorted(make_dataset(self.root))> to get all the image paths under the directory self.root
+        self.image_paths = []  # You can call <sorted(make_dataset(self.root, opt.max_dataset_size))> to get all the image paths under the directory self.root
         # define the default transform function. You can use <base_dataset.get_transform>; You can also define your custom transform function
         self.transform = get_transform(opt)
 
@@ -57,7 +57,7 @@ def __getitem__(self, index):
             index -- a random integer for data indexing
 
         Returns:
-            a dicrtionary of data with their names. It ususally contains the data itself and its metadata information.
+            a dictionary of data with their names. It ususally contains the data itself and its metadata information.
 
         Step 1: get a random image path: e.g., path = self.image_paths[index]
         Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB').
diff --git a/data/unaligned_dataset.py b/data/unaligned_dataset.py
@@ -15,11 +15,8 @@ def __init__(self, opt):
         self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
         self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
 
-        self.A_paths = make_dataset(self.dir_A)
-        self.B_paths = make_dataset(self.dir_B)
-
-        self.A_paths = sorted(self.A_paths)
-        self.B_paths = sorted(self.B_paths)
+        self.A_paths = sorted(make_dataset(self.dir_A, opt.max_dataset_size))
+        self.B_paths = sorted(make_dataset(self.dir_B, opt.max_dataset_size))
         self.A_size = len(self.A_paths)
         self.B_size = len(self.B_paths)
         btoA = self.opt.direction == 'BtoA'
diff --git a/util/visualizer.py b/util/visualizer.py
@@ -3,8 +3,8 @@
 import sys
 import ntpath
 import time
-from . import util
-from . import html
+from . import util, html
+from subprocess import Popen, PIPE
 from scipy.misc import imresize
 
 if sys.version_info[0] == 2:
@@ -41,11 +41,12 @@ def save_images(webpage, visuals, image_path, aspect_ratio=1.0, width=256):
 
 class Visualizer():
     def __init__(self, opt):
+        self.opt = opt
         self.display_id = opt.display_id
         self.use_html = opt.isTrain and not opt.no_html
         self.win_size = opt.display_winsize
         self.name = opt.name
-        self.opt = opt
+        self.port = opt.display_port
         self.saved = False
         if self.display_id > 0:
             import visdom
@@ -66,8 +67,10 @@ def reset(self):
         self.saved = False
 
     def throw_visdom_connection_error(self):
-        print('\n\nCould not connect to Visdom server (https://github.com/facebookresearch/visdom) for displaying training progress.\nYou can suppress connection to Visdom using the option --display_id -1. To install visdom, run \n$ pip install visdom\n, and start the server by \n$ python -m visdom.server.\n\n')
-        exit(1)
+        cmd = sys.executable + ' -m visdom.server -p %d &>/dev/null &' % self.port
+        print('\n\nCould not connect to Visdom server. \n Trying to start a server....')
+        print('Command: %s' % cmd)
+        Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
 
     # |visuals|: dictionary of images to display or save
     def display_current_results(self, visuals, epoch, save_result):

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+"""This package """`
`1`	`2`	`import importlib`
`2`	`3`	`import torch.utils.data`
`3`	`4`	`from data.base_dataset import BaseDataset`