forked from sarahmu/Histopathology-Imaging
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_loader.py
More file actions
72 lines (67 loc) · 3.48 KB
/
data_loader.py
File metadata and controls
72 lines (67 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
from PIL import Image
import os
import random
class Dataset(object):
def __init__(self, gray_dir, color_dir, batch_size, img_dim, color_channels=4, shuffle=False, data_type='float32'):
"""
Construct a Dataset object to iterate over grayscale images and their corresponding color images, if any
Inputs:
- gray_dir: String for the directory storing the gray images, with the ending slash
- color_dir: String for the directory storing the color images, with the ending slahs.
None if there is no groundtruth color images (at testing time)
- batch_size: Integer giving number of images per minibatch
- img_dim: Integer for the width and height of the images
- color_channels: Integer for the number of channels in the color images
- shuffle: Boolean for shuffling the data on each epoch
- data_type: String for specifying the numpy data type to convert to for the images
"""
self.gray_dir = gray_dir
self.color_dir = color_dir
self.batch_size = batch_size
self.img_dim = img_dim
self.color_channels = color_channels
self.shuffle = shuffle
self.data_type = data_type
self.gray_img_names = [gray_img for gray_img in os.listdir(gray_dir) if gray_img.endswith('.png')]
if color_dir is None:
self.color_img_names = None
else:
self.color_img_names = [color_img for color_img in os.listdir(color_dir) if color_img.endswith('.png')]
# Convert a single image to numpy array with shape 1 x H x W x C
def img_to_np(self, filename, img_type):
assert img_type == 'gray' or img_type == 'color', "img_type has to be 'gray' or 'color'"
img = Image.open(filename)
data = np.array(img, dtype=self.data_type)
if img_type == 'gray':
data = data.reshape((1, *data.shape, 1))
elif img_type == 'color':
data = data.reshape((1, *data.shape))
return data
# Given a list of gray image files, create a numpy array with shape B x H x W x C for the gray images and
# the color images, where N is the batch size
def imgs_to_batch(self, gray_filelist, color_filelist=None):
B = len(gray_filelist)
gray_batch = np.zeros((B, self.img_dim, self.img_dim, 1))
for i in range(B):
gray_batch[i,:,:,:] = self.img_to_np(gray_filelist[i], 'gray')
if color_filelist is not None:
color_batch = np.zeros((B, self.img_dim, self.img_dim, self.color_channels))
for i in range(B):
color_batch[i,:,:,:] = self.img_to_np(color_filelist[i], 'color')
else:
color_batch = None
return gray_batch, color_batch
# Iterative method for generating a batch of gray image data and color image data
def __iter__(self):
N = len(self.gray_img_names)
B = self.batch_size
if self.shuffle:
random.shuffle(self.gray_img_names)
color_img_names = [img_name[5:] for img_name in self.gray_img_names] # remove the prefix 'gray_'
gray_files = [self.gray_dir + img_name for img_name in self.gray_img_names]
if self.color_dir is not None:
color_files = [self.color_dir + img_name for img_name in color_img_names]
return iter(self.imgs_to_batch(gray_files[i:i+B], color_files[i:i+B]) for i in range(0, N, B))
else:
return iter(self.imgs_to_batch(gray_files[i:i+B], None) for i in range(0, N, B))