Refactored the Dataset code to support different preprocess options for pix2pix

taesungp · taesungp · commit f27da7d53aaa · 2019-01-11T11:25:34.000-08:00
diff --git a/data/aligned_dataset.py b/data/aligned_dataset.py
@@ -1,6 +1,6 @@
 import os.path
 import random
-from data.base_dataset import BaseDataset, get_transform
+from data.base_dataset import BaseDataset, get_params, get_transform
 import torchvision.transforms as transforms
 from data.image_folder import make_dataset
 from PIL import Image
@@ -23,12 +23,8 @@ def __init__(self, opt):
         self.dir_AB = os.path.join(opt.dataroot, opt.phase)  # get the image directory
         self.AB_paths = sorted(make_dataset(self.dir_AB, opt.max_dataset_size))  # get image paths
         assert(self.opt.load_size >= self.opt.crop_size)   # crop_size should be smaller than the size of loaded image
-        input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc
-        output_nc = self.opt.input_nc if self.opt.direction == 'BtoA' else self.opt.output_nc
-        # we manually crop and flip in __getitem__ to make sure we apply the same crop and flip for image A and B
-        # we disable the cropping and flipping in the function get_transform
-        self.transform_A = get_transform(opt, grayscale=(input_nc == 1), crop=False, flip=False)
-        self.transform_B = get_transform(opt, grayscale=(output_nc == 1), crop=False, flip=False)
+        self.input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc
+        self.output_nc = self.opt.input_nc if self.opt.direction == 'BtoA' else self.opt.output_nc
 
     def __getitem__(self, index):
         """Return a data point and its metadata information.
@@ -48,20 +44,17 @@ def __getitem__(self, index):
         # split AB image into A and B
         w, h = AB.size
         w2 = int(w / 2)
-        A = AB.crop((0, 0, w2, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
-        B = AB.crop((w2, 0, w, h)).resize((self.opt.load_size, self.opt.load_size), Image.BICUBIC)
-        # apply the same cropping to both A and B
-        if 'crop' in self.opt.preprocess:
-            x, y, h, w = transforms.RandomCrop.get_params(A, output_size=[self.opt.crop_size, self.opt.crop_size])
-            A = A.crop((x, y, w, h))
-            B = B.crop((x, y, w, h))
-        # apply the same flipping to both A and B
-        if (not self.opt.no_flip) and random.random() < 0.5:
-            A = A.transpose(Image.FLIP_LEFT_RIGHT)
-            B = B.transpose(Image.FLIP_LEFT_RIGHT)
-        # call standard transformation function
-        A = self.transform_A(A)
-        B = self.transform_B(B)
+        A = AB.crop((0, 0, w2, h))
+        B = AB.crop((w2, 0, w, h))
+
+        # apply the same transform to both A and B
+        transform_params = get_params(self.opt, A.size)
+        A_transform = get_transform(self.opt, transform_params, grayscale=(self.input_nc == 1))
+        B_transform = get_transform(self.opt, transform_params, grayscale=(self.output_nc == 1))
+
+        A = A_transform(A)
+        B = B_transform(B)
+
         return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path}
 
     def __len__(self):
diff --git a/data/base_dataset.py b/data/base_dataset.py
@@ -2,6 +2,8 @@
 
 It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses.
 """
+import random
+import numpy as np
 import torch.utils.data as data
 from PIL import Image
 import torchvision.transforms as transforms
@@ -58,103 +60,84 @@ def __getitem__(self, index):
         pass
 
 
-def get_transform(opt, grayscale=False, convert=True, crop=True, flip=True):
-    """Create a torchvision transformation function
+def get_params(opt, size):
+    w, h = size
+    new_h = h
+    new_w = w
+    if opt.preprocess == 'resize_and_crop':
+        new_h = new_w = opt.load_size
+    elif opt.preprocess == 'scale_width_and_crop':
+        new_w = opt.load_size
+        new_h = opt.load_size * h // w
 
-    The type of transformation is defined by option (e.g., [opt.preprocess], [opt.load_size], [opt.crop_size])
-    and can be overwritten by arguments such as [convert], [crop], and [flip]
+    x = random.randint(0, np.maximum(0, new_w - opt.crop_size))
+    y = random.randint(0, np.maximum(0, new_h - opt.crop_size))
 
-    Parameters:
-        opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
-        grayscale (bool)   -- if convert input RGB image to a grayscale image
-        convert (bool)     -- if convert an image to a tensor array betwen [-1, 1]
-        crop    (bool)     -- if apply cropping
-        flip    (bool)     -- if apply horizontal flippling
-    """
+    flip = random.random() > 0.5
+
+    return {'crop_pos': (x, y), 'flip': flip}
+
+
+def get_transform(opt, params, grayscale=False, method=Image.BICUBIC, convert=True):
     transform_list = []
     if grayscale:
         transform_list.append(transforms.Grayscale(1))
-    if opt.preprocess == 'resize_and_crop':
+    if 'resize' in opt.preprocess:
         osize = [opt.load_size, opt.load_size]
-        transform_list.append(transforms.Resize(osize, Image.BICUBIC))
-        transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.preprocess == 'crop' and crop:
-        transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.preprocess == 'scale_width':
-        transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.crop_size)))
-    elif opt.preprocess == 'scale_width_and_crop':
-        transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size)))
-        if crop:
-            transform_list.append(transforms.RandomCrop(opt.crop_size))
-    elif opt.preprocess == 'none':
-        transform_list.append(transforms.Lambda(lambda img: __adjust(img)))
-    else:
-        raise ValueError('--preprocess %s is not a valid option.' % opt.preprocess)
-
-    if not opt.no_flip and flip:
-        transform_list.append(transforms.RandomHorizontalFlip())
+        transform_list.append(transforms.Scale(osize, method))
+    elif 'scale_width' in opt.preprocess:
+        transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size, method)))
+
+    if 'crop' in opt.preprocess:
+        transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_pos'], opt.crop_size)))
+
+    if opt.preprocess == 'none':
+        base = 4
+        transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base, method)))
+
+    if not opt.no_flip and params['flip']:
+        transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip'])))
+
     if convert:
         transform_list += [transforms.ToTensor(),
                            transforms.Normalize((0.5, 0.5, 0.5),
                                                 (0.5, 0.5, 0.5))]
     return transforms.Compose(transform_list)
 
 
-def __adjust(img):
-    """Modify the width and height to be multiple of 4.
-
-    Parameters:
-        img (PIL image) -- input image
-
-    Returns a modified image whose width and height are mulitple of 4.
-
-    the size needs to be a multiple of 4,
-    because going through generator network may change img size
-    and eventually cause size mismatch error
-    """
+def __make_power_2(img, base, method=Image.BICUBIC):
     ow, oh = img.size
-    mult = 4
-    if ow % mult == 0 and oh % mult == 0:
+    h = int(round(oh / base) * base)
+    w = int(round(ow / base) * base)
+    if (h == oh) and (w == ow):
         return img
-    w = (ow - 1) // mult
-    w = (w + 1) * mult
-    h = (oh - 1) // mult
-    h = (h + 1) * mult
-
-    if ow != w or oh != h:
-        __print_size_warning(ow, oh, w, h)
-
-    return img.resize((w, h), Image.BICUBIC)
-
 
-def __scale_width(img, target_width):
-    """Resize images so that the width of the output image is the same as a target width
+    __print_size_warning(ow, oh, w, h)
+    return img.resize((w, h), method)
 
-    Parameters:
-        img (PIL image)    -- input image
-        target_width (int) -- target image width
 
-    Returns a modified image whose width matches the target image width;
-
-    the size needs to be a multiple of 4,
-    because going through generator network may change img size
-    and eventually cause size mismatch error
-    """
+def __scale_width(img, target_width, method=Image.BICUBIC):
     ow, oh = img.size
-
-    mult = 4
-    assert target_width % mult == 0, "the target width needs to be multiple of %d." % mult
-    if (ow == target_width and oh % mult == 0):
+    if (ow == target_width):
         return img
     w = target_width
-    target_height = int(target_width * oh / ow)
-    m = (target_height - 1) // mult
-    h = (m + 1) * mult
+    h = int(target_width * oh / ow)
+    return img.resize((w, h), method)
+
+
+def __crop(img, pos, size):
+    ow, oh = img.size
+    x1, y1 = pos
+    tw = th = size
+    if (ow > tw or oh > th):
+        return img.crop((x1, y1, x1 + tw, y1 + th))
+    return img
 
-    if target_height != h:
-        __print_size_warning(target_width, target_height, w, h)
 
-    return img.resize((w, h), Image.BICUBIC)
+def __flip(img, flip):
+    if flip:
+        return img.transpose(Image.FLIP_LEFT_RIGHT)
+    return img
 
 
 def __print_size_warning(ow, oh, w, h):
diff --git a/data/colorization_dataset.py b/data/colorization_dataset.py
@@ -1,5 +1,5 @@
 import os.path
-from data.base_dataset import BaseDataset, get_transform
+from data.base_dataset import BaseDataset, get_params, get_transform
 from data.image_folder import make_dataset
 from skimage import color  # require skimage
 from PIL import Image
@@ -39,7 +39,6 @@ def __init__(self, opt):
         self.dir = os.path.join(opt.dataroot)
         self.AB_paths = sorted(make_dataset(self.dir, opt.max_dataset_size))
         assert(opt.input_nc == 1 and opt.output_nc == 2 and opt.direction == 'AtoB')
-        self.transform = get_transform(opt, convert=False)
 
     def __getitem__(self, index):
         """Return a data point and its metadata information.
@@ -55,7 +54,9 @@ def __getitem__(self, index):
         """
         path = self.AB_paths[index]
         im = Image.open(path).convert('RGB')
-        im = self.transform(im)
+        transform_params = get_params(self.opt, im.size)
+        transform = get_transform(self.opt, transform_params, convert=False)
+        im = transform(im)
         im = np.array(im)
         lab = color.rgb2lab(im).astype(np.float32)
         lab_t = transforms.ToTensor()(lab)
diff --git a/data/single_dataset.py b/data/single_dataset.py
@@ -1,4 +1,4 @@
-from data.base_dataset import BaseDataset, get_transform
+from data.base_dataset import BaseDataset, get_params, get_transform
 from data.image_folder import make_dataset
 from PIL import Image
 
@@ -17,8 +17,8 @@ def __init__(self, opt):
         """
         BaseDataset.__init__(self, opt)
         self.A_paths = sorted(make_dataset(opt.dataroot, opt.max_dataset_size))
-        input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc
-        self.transform = get_transform(opt, input_nc == 1)
+        self.input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc
+        # self.transform = get_transform(opt, input_nc == 1)
 
     def __getitem__(self, index):
         """Return a data point and its metadata information.
@@ -32,7 +32,9 @@ def __getitem__(self, index):
         """
         A_path = self.A_paths[index]
         A_img = Image.open(A_path).convert('RGB')
-        A = self.transform(A_img)
+        transform_params = get_params(self.opt, A_img.size)
+        transform = get_transform(self.opt, transform_params, grayscale=(self.input_nc == 1))
+        A = transform(A_img)
         return {'A': A, 'A_paths': A_path}
 
     def __len__(self):
diff --git a/data/unaligned_dataset.py b/data/unaligned_dataset.py
@@ -1,5 +1,5 @@
 import os.path
-from data.base_dataset import BaseDataset, get_transform
+from data.base_dataset import BaseDataset, get_params, get_transform
 from data.image_folder import make_dataset
 from PIL import Image
 import random
@@ -31,10 +31,8 @@ def __init__(self, opt):
         self.A_size = len(self.A_paths)  # get the size of dataset A
         self.B_size = len(self.B_paths)  # get the size of dataset B
         btoA = self.opt.direction == 'BtoA'
-        input_nc = self.opt.output_nc if btoA else self.opt.input_nc       # get the number of channels of input image
-        output_nc = self.opt.input_nc if btoA else self.opt.output_nc      # get the number of channels of output image
-        self.transform_A = get_transform(opt, grayscale=(input_nc == 1))   # if nc == 1, we convert RGB to grayscale image
-        self.transform_B = get_transform(opt, grayscale=(output_nc == 1))  # if nc == 1, we convert RGB to grayscale image
+        self.input_nc = self.opt.output_nc if btoA else self.opt.input_nc       # get the number of channels of input image
+        self.output_nc = self.opt.input_nc if btoA else self.opt.output_nc      # get the number of channels of output image
 
     def __getitem__(self, index):
         """Return a data point and its metadata information.
@@ -57,8 +55,14 @@ def __getitem__(self, index):
         A_img = Image.open(A_path).convert('RGB')
         B_img = Image.open(B_path).convert('RGB')
         # apply image transformation
-        A = self.transform_A(A_img)
-        B = self.transform_B(B_img)
+        A_transform_params = get_params(self.opt, A_img.size)
+        A_transform = get_transform(self.opt, A_transform_params, grayscale=(self.input_nc == 1))
+        A = A_transform(A_img)
+
+        B_transform_params = get_params(self.opt, B_img.size)
+        B_transform = get_transform(self.opt, B_transform_params, grayscale=(self.output_nc == 1))
+        B = B_transform(B_img)
+
         return {'A': A, 'B': B, 'A_paths': A_path, 'B_paths': B_path}
 
     def __len__(self):