-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdataset.py
More file actions
executable file
·113 lines (73 loc) · 3.39 KB
/
dataset.py
File metadata and controls
executable file
·113 lines (73 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
class SignalDataset(Dataset):
def __init__(self, data, seqLen, normalize=False, means=None, stdevs=None, num_train=None):
"""
Initializes signal dataset. Portions the given data into sequences
of seqLen.
Parameters
----------
data : ndarray
Input dataset.
seqLen : int
Length of the sequence to cut.
Returns
-------
None.
"""
# Partition the data into sequences
numRemove = int(data.shape[0] % seqLen)
if not numRemove == 0:
data = np.copy(data)[:-1*numRemove, :]
else:
data = np.copy(data)
numInstances, self.numFeatures = data.shape
self.numSequences = numInstances // seqLen
self.normalize = normalize
self.means = means
self.stdevs = stdevs
self.num_train = num_train
self.seqLen = seqLen
# Create a holder for the sequences
data_holder = np.zeros((seqLen, self.numFeatures, self.numSequences))
prev_index = 0
for i in range(0, numInstances, seqLen):
data_holder[:, :, i // seqLen] = data[prev_index: (i + seqLen), :]
prev_index = i + seqLen
self.data = data_holder
return
def __len__(self):
return self.numSequences
def normalizeData(self, data, idx):
# Normalize and update means, stdevs
iteration = self.num_train + idx
for idx in range(data.shape[0]):
for i in range(self.means.shape[-1]):
new_mean = self.means[i] + ((data[idx, i] - self.means[i]) / iteration)
variance = self.stdevs[i] ** 2
self.stdevs[i] = np.sqrt((1 / (1 + iteration)) * (iteration * variance + (data[idx, i] - new_mean) * (data[idx, i] - new_mean)))
self.means[i] = new_mean
data[idx, :] = (data[idx, :] - self.means) / self.stdevs
return data
def __getitem__(self, idx):
# Read file at given index
data = self.data[:, :, idx].reshape((self.seqLen, self.numFeatures))
if self.normalize:
data = self.normalizeData(data, idx)
data = data.astype(np.float32)
data = torch.from_numpy(data)
return data
def get_dataloaders(train_set, test_set, val_set, means, stdevs, num_train, normalize_online = True, seqLen = 4, batch_size=8):
# Create datasets
train_dataset = SignalDataset(train_set, seqLen, normalize=False)
test_dataset = SignalDataset(test_set, seqLen, normalize=False)
val_dataset = SignalDataset(val_set, seqLen, normalize=normalize_online, means=means, stdevs=stdevs, num_train=num_train)
# Create dataloaders
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size,
pin_memory=True, num_workers=8)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=1,
pin_memory=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=1,
pin_memory=True, num_workers=8)
return train_loader, test_loader, val_loader