-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataload.py
More file actions
176 lines (125 loc) · 5.26 KB
/
dataload.py
File metadata and controls
176 lines (125 loc) · 5.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from torch.utils.data import Dataset
import os
import random
from torchvision.io import read_image
import torch
from torchvision import transforms
import math
class MaskedFaceDataset(Dataset):
def __init__(self,
path : str,
height : int = 244,
width : int = 244):
def filter_func(folder):
return len(os.listdir(path + folder + '/')) >= 2
self.id_list = list(filter(filter_func, os.listdir(path)))
self.path = path
self.mean = torch.Tensor([0.5360, 0.4703, 0.4324]) # Dataset mean
self.std = torch.Tensor([0.2720, 0.2469, 0.2537]) # Dataset std
self.height = height
self.width = width
def __len__(self):
pass
def __getitem__(self,
index : int):
pass
def transformation(self,
img : torch.Tensor) -> torch.Tensor:
normalize = transforms.Normalize(self.mean, self.std)
resize = transforms.Resize((self.height, self.width))
transform = transforms.Compose([normalize, resize])
return transform(img)
# Data set used for training the network
class MaskedFaceDatasetTraining(MaskedFaceDataset):
def __init__(self,
path : str,
height : int = 244,
width : int = 244):
super().__init__(path, height, width)
def __len__(self) -> int:
return len(self.id_list)
def __getitem__(self,
index : int) -> tuple[torch.Tensor, torch.Tensor]:
id = self.id_list[index] #str with the name
dir_path = self.path + id + '/'
img_list = os.listdir(dir_path)
img_sampled = random.sample(img_list, 2)
img_1 = read_image(dir_path + img_sampled[0]).float()/255
img_2 = read_image(dir_path + img_sampled[1]).float()/255
return self.transformation(img_1), self.transformation(img_2)
# Dataset used at inference time using the k-nn classifier
class MaskedFaceDatasetInference(MaskedFaceDataset):
def __init__(self,
path : str,
height : int = 244,
width : int = 244,
id_list : list = None):
super().__init__(path, height, width)
self.img_list = []
self.label = []
if id_list:
'''
If None id_list is computed from the id in the folder.
Otherwise it can be passed so we are sure to have the same
id list in the train and test set of the k-NN.
'''
self.id_list = id_list
for id in self.id_list:
dir_path = self.path + id + '/'
imgs = os.listdir(dir_path)
for img_name in imgs:
self.img_list.append(dir_path + img_name)
self.label.append(id)
def __len__(self) -> int:
return len(self.label)
def __getitem__(self,
index : int) -> tuple[torch.Tensor, str]:
id = self.label[index] #str with the name
img = read_image(self.img_list[index]).float()/255
return self.transformation(img), id
class MaskedFaceDatasetNewSampler(Dataset):
'''
This version of dataset tries to exploit all the images in each epoch.
In the first version for each epochs, we sampled only a couple of images for
each id, now we sample n couple for each id where n is the number of couple
that can be obtained sampling from the folder without replacing.
'''
def __init__(self,
path : str,
height : int = 244,
width : int = 244):
self.id_list = []
self.id_listdir = os.listdir(path)
for id in self.id_listdir:
id_path = path + id + '/'
n_img = len(os.listdir(id_path))
for _ in range(math.floor(n_img/2)):
self.id_list.append(id)
self.path = path
self.mean = torch.Tensor([0.5360, 0.4703, 0.4324]) # Dataset mean
self.std = torch.Tensor([0.2720, 0.2469, 0.2537]) # Dataset std
self.height = height
self.width = width
self.empty_dict()
def __len__(self) -> int:
return len(self.id_list)
def __getitem__(self,
index : int) -> tuple[torch.Tensor, torch.Tensor]:
id = self.id_list[index] #str with the name
dir_path = self.path + id + '/'
img_list = os.listdir(dir_path)
img_list = list(set(img_list) - set(self.old_sample[id]))
img_sampled = random.sample(img_list, 2)
self.old_sample[id].append(img_sampled[0])
self.old_sample[id].append(img_sampled[1])
img_1 = read_image(dir_path + img_sampled[0]).float()/255
img_2 = read_image(dir_path + img_sampled[1]).float()/255
return self.transformation(img_1), self.transformation(img_2)
def transformation(self,
img : torch.Tensor) -> torch.Tensor:
normalize = transforms.Normalize(self.mean, self.std)
resize = transforms.Resize((self.height, self.width))
transform = transforms.Compose([normalize, resize])
return transform(img)
def empty_dict(self) -> None:
self.old_sample = {id : [] for id in self.id_listdir}