You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"""Dataset which is transformed from another with a transform.
40
+
41
+
Args:
42
+
dataset (Dataset): the base dataset.
43
+
transform (callable): the transform which takes an example of the base dataset as parameter and return a new example.
44
+
"""
45
+
self._dataset=dataset
46
+
self._transform=transform
47
+
48
+
def__len__(self):
49
+
returnlen(self._dataset)
50
+
51
+
def__getitem__(self, i):
52
+
in_data=self._dataset[i]
53
+
returnself._transform(in_data)
54
+
55
+
56
+
classCacheDataset(Dataset):
57
+
def__init__(self, dataset):
58
+
"""A lazy cache of the base dataset.
59
+
60
+
Args:
61
+
dataset (Dataset): the base dataset to cache.
62
+
"""
63
+
self._dataset=dataset
64
+
self._cache=dict()
65
+
66
+
def__len__(self):
67
+
returnlen(self._dataset)
68
+
69
+
def__getitem__(self, i):
70
+
ifinotinself._cache:
71
+
self._cache[i] =self._dataset[i]
72
+
returnself._cache[i]
73
+
74
+
75
+
classTupleDataset(Dataset):
76
+
def__init__(self, *datasets):
77
+
"""A compound dataset made from several datasets of the same length. An example of the `TupleDataset` is a tuple of examples from the constituent datasets.
78
+
79
+
Args:
80
+
datasets: tuple[Dataset], the constituent datasets.
81
+
"""
82
+
ifnotdatasets:
83
+
raiseValueError("no datasets are given")
84
+
length=len(datasets[0])
85
+
fori, datasetinenumerate(datasets):
86
+
iflen(dataset) !=length:
87
+
raiseValueError("all the datasets should have the same length."
"""A Dataset which is a slice of the base dataset.
157
+
158
+
Args:
159
+
dataset (Dataset): the base dataset.
160
+
start (int): the start of the slice.
161
+
finish (int): the end of the slice, not inclusive.
162
+
order (List[int], optional): the order, it is a permutation of the valid example ids of the base dataset. If `order` is provided, the slice is taken in `order`. Defaults to None.
163
+
"""
164
+
ifstart<0orfinish>len(dataset):
165
+
raiseValueError("subset overruns the dataset.")
166
+
self._dataset=dataset
167
+
self._start=start
168
+
self._finish=finish
169
+
self._size=finish-start
170
+
171
+
iforderisnotNoneandlen(order) !=len(dataset):
172
+
raiseValueError(
173
+
"order should have the same length as the dataset"
174
+
"len(order) = {} which does not euqals len(dataset) = {} ".
175
+
format(len(order), len(dataset)))
176
+
self._order=order
177
+
178
+
def__len__(self):
179
+
returnself._size
180
+
181
+
def__getitem__(self, i):
182
+
ifi>=0:
183
+
ifi>=self._size:
184
+
raiseIndexError('dataset index out of range')
185
+
index=self._start+i
186
+
else:
187
+
ifi<-self._size:
188
+
raiseIndexError('dataset index out of range')
189
+
index=self._finish+i
190
+
191
+
ifself._orderisnotNone:
192
+
index=self._order[index]
193
+
returnself._dataset[index]
194
+
195
+
196
+
classSubsetDataset(Dataset):
197
+
def__init__(self, dataset, indices):
198
+
"""A Dataset which is a subset of the base dataset.
199
+
200
+
Args:
201
+
dataset (Dataset): the base dataset.
202
+
indices (Iterable[int]): the indices of the examples to pick.
203
+
"""
204
+
self._dataset=dataset
205
+
iflen(indices) >len(dataset):
206
+
raiseValueError("subset's size larger that dataset's size!")
207
+
self._indices=indices
208
+
self._size=len(indices)
209
+
210
+
def__len__(self):
211
+
returnself._size
212
+
213
+
def__getitem__(self, i):
214
+
index=self._indices[i]
215
+
returnself._dataset[index]
216
+
217
+
218
+
classFilterDataset(Dataset):
219
+
def__init__(self, dataset, filter_fn):
220
+
"""A filtered dataset.
221
+
222
+
Args:
223
+
dataset (Dataset): the base dataset.
224
+
filter_fn (callable): a callable which takes an example of the base dataset and return a boolean.
0 commit comments