-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathdata.py
More file actions
131 lines (111 loc) · 4.27 KB
/
data.py
File metadata and controls
131 lines (111 loc) · 4.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import os
import numpy as np
from config import BATCH_SIZE, FRAMES_NO, SIZE
from fgn_data_transformation import (color_jitter, normalize,
normalize_respectively, random_flip,
uniform_sampling, video_2_npy)
from keras.utils import Sequence, np_utils
class DataGenerator(Sequence):
"""
Data generator for Keras fit_generator
Args:
directory: path do data directory
batch_size: batch size number
shuffle: do shuffle of images
data_augmentation: do augmentation of images
"""
def __init__(self, directory, batch_size=BATCH_SIZE, shuffle=True, data_augmentation=True):
# initialize the params
self.directory = directory
self.batch_size = batch_size
self.shuffle = shuffle
self.data_aug = data_augmentation
# sub-folders
self.dirs = sorted(os.listdir(self.directory))
# gets data
self.X_path, self.Y_dict = self.search_data()
# basic information
self.n_files = len(self.X_path)
self.n_classes = len(self.dirs)
self.indexes = np.arange(len(self.X_path))
def search_data(self):
"""
Load all the save_path of files, and create a dictionary that save the pair of "data:label"
Returns:
list of paths and dictionary with pair "data:label"
"""
x_path = []
y_dict = {}
# list all kinds of sub-folders
categorical = np_utils.to_categorical(range(len(self.dirs)))
for i, folder in enumerate(self.dirs):
folder_path = os.path.join(self.directory, folder)
for file in os.listdir(folder_path):
file_path = os.path.join(folder_path, file)
# append the each file path, and keep its label
x_path.append(file_path)
y_dict[file_path] = categorical[i]
return x_path, y_dict
def __len__(self):
"""
Calculate the iterations of each epoch
Returns:
Iterations of each epoch
"""
steps_per_epoch = np.ceil(len(self.X_path) / float(self.batch_size))
return int(steps_per_epoch)
def __getitem__(self, index):
"""
Gets batch data
Args:
index: index
Returns:
batch data
"""
# get the indexs of each batch
batch_indexs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# using batch_indexs to get path of current batch
batch_path = [self.X_path[k] for k in batch_indexs]
# get batch data
batch_x, batch_y = self.data_generation(batch_path)
return batch_x, batch_y
def on_epoch_end(self):
"""
Shuffle the data at each end of epoch
"""
if self.shuffle:
np.random.shuffle(self.indexes)
def data_generation(self, batch_path):
"""
Generates batch data
Args:
batch_path: patch of current batch
Returns:
batch of data and labels
"""
# load data into memory, you can change the np.load to any method you want
batch_x = [self.load_data(x) for x in batch_path]
batch_y = [self.Y_dict[x] for x in batch_path]
# transfer the data format and take one-hot coding for labels
batch_x = np.array(batch_x)
batch_y = np.array(batch_y)
return batch_x, batch_y
def load_data(self, path):
"""
Transform video to correct format
Args:
path: patch of the video
Returns:
transformed data
"""
# load np array with 5 channels (1-3 for RGB, 4-5 for optical flows)
data = video_2_npy(file_path=path)
data = np.float32(data)
data = uniform_sampling(video=data, target_frames=FRAMES_NO)
# whether to utilize the data augmentation
if self.data_aug:
data[..., :3] = color_jitter(data[..., :3])
data = random_flip(data, prob=0.5)
# normalize rgb images and optical flows, respectively
data = normalize_respectively(data)
return data