masked-face/dataload.py at main · TiaBerte/masked-face · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
from torch.utils.data import Dataset
import os
import random
from torchvision.io import read_image
import torch
from torchvision import transforms
import math

class MaskedFaceDataset(Dataset):

    def __init__(self,
                 path : str,
                 height : int = 244,
                 width : int = 244):
        def filter_func(folder):
            return len(os.listdir(path + folder + '/')) >= 2

        self.id_list = list(filter(filter_func, os.listdir(path)))
        self.path = path
        self.mean = torch.Tensor([0.5360, 0.4703, 0.4324])  # Dataset mean
        self.std = torch.Tensor([0.2720, 0.2469, 0.2537])  # Dataset std
        self.height = height
        self.width = width


    def __len__(self):
        pass


    def __getitem__(self,
                    index : int):
        pass


    def transformation(self,
                       img : torch.Tensor) -> torch.Tensor:

        normalize = transforms.Normalize(self.mean, self.std)
        resize = transforms.Resize((self.height, self.width))
        transform = transforms.Compose([normalize, resize])

        return transform(img)


# Data set used for training the network
class MaskedFaceDatasetTraining(MaskedFaceDataset):

    def __init__(self,
                 path : str,
                 height : int = 244,
                 width : int = 244):
        super().__init__(path, height, width)


    def __len__(self) -> int:
        return len(self.id_list)


    def __getitem__(self,
                    index : int) -> tuple[torch.Tensor, torch.Tensor]:

        id = self.id_list[index] #str with the name
        dir_path = self.path + id + '/'
        img_list = os.listdir(dir_path)
        img_sampled = random.sample(img_list, 2)
        img_1 = read_image(dir_path + img_sampled[0]).float()/255
        img_2 = read_image(dir_path + img_sampled[1]).float()/255


        return self.transformation(img_1), self.transformation(img_2)


# Dataset used at inference time using the k-nn classifier
class MaskedFaceDatasetInference(MaskedFaceDataset):

    def __init__(self,
                 path : str,
                 height : int = 244,
                 width : int = 244,
                 id_list : list = None):
        super().__init__(path, height, width)
        self.img_list = []
        self.label = []
        if id_list:
          '''
          If None id_list is computed from the id in the folder.
          Otherwise it can be passed so we are sure to have the same
          id list in the train and test set of the k-NN.
          '''
          self.id_list = id_list
        for id in self.id_list:
            dir_path = self.path + id + '/'
            imgs = os.listdir(dir_path)
            for img_name in imgs:
              self.img_list.append(dir_path + img_name)
              self.label.append(id)


    def __len__(self) -> int:
        return len(self.label)


    def __getitem__(self,
                    index : int) -> tuple[torch.Tensor, str]:

        id = self.label[index] #str with the name
        img = read_image(self.img_list[index]).float()/255

        return self.transformation(img), id


class MaskedFaceDatasetNewSampler(Dataset):
    '''
    This version of dataset tries to exploit all the images in each epoch.
    In the first version for each epochs, we sampled only a couple of images for
    each id, now we sample n couple for each id where n is the number of couple
    that can be obtained sampling from the folder without replacing.
    '''

    def __init__(self,
                 path : str,
                 height : int = 244,
                 width : int = 244):

        self.id_list = []
        self.id_listdir = os.listdir(path)
        for id in self.id_listdir:
            id_path = path + id + '/'
            n_img = len(os.listdir(id_path))
            for _ in range(math.floor(n_img/2)):
                self.id_list.append(id)

        self.path = path
        self.mean = torch.Tensor([0.5360, 0.4703, 0.4324])  # Dataset mean
        self.std = torch.Tensor([0.2720, 0.2469, 0.2537])  # Dataset std
        self.height = height
        self.width = width

        self.empty_dict()


    def __len__(self) -> int:
        return len(self.id_list)


    def __getitem__(self,
                    index : int) -> tuple[torch.Tensor, torch.Tensor]:

        id = self.id_list[index] #str with the name
        dir_path = self.path + id + '/'
        img_list = os.listdir(dir_path)
        img_list = list(set(img_list) - set(self.old_sample[id]))
        img_sampled = random.sample(img_list, 2)
        self.old_sample[id].append(img_sampled[0])
        self.old_sample[id].append(img_sampled[1])

        img_1 = read_image(dir_path + img_sampled[0]).float()/255
        img_2 = read_image(dir_path + img_sampled[1]).float()/255

        return self.transformation(img_1), self.transformation(img_2)


    def transformation(self,
                       img : torch.Tensor) -> torch.Tensor:

        normalize = transforms.Normalize(self.mean, self.std)
        resize = transforms.Resize((self.height, self.width))
        transform = transforms.Compose([normalize, resize])

        return transform(img)


    def empty_dict(self) -> None:
        self.old_sample = {id : [] for id in self.id_listdir}