Skip to content

Commit d32c025

Browse files
committed
Small changes
1 parent 31f82c6 commit d32c025

File tree

2 files changed

+74
-91
lines changed

2 files changed

+74
-91
lines changed

mlprimitives/custom/timeseries_preprocessing.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def cutoff_window_sequences(X, timeseries, window_size, cutoff_time=None, time_i
213213
timeseries (pandas.DataFrame):
214214
``pandas.DataFrame`` containing the actual timeseries data. The time index
215215
and either be set as the DataFrame index or as a column.
216-
window_size (int):
216+
window_size (int, str or Timedelta):
217217
Numer of elements to take before the cutoff time for each sequence.
218218
cutoff_time (str):
219219
Optional. If given, the indicated column will be used as the cutoff time.
@@ -245,16 +245,20 @@ def cutoff_window_sequences(X, timeseries, window_size, cutoff_time=None, time_i
245245
selected = timeseries[timeseries.index < row.Index]
246246

247247
mask = [True] * len(selected)
248+
248249
for column in columns:
249250
mask &= selected.pop(column) == getattr(row, column)
250251

251252
selected = selected[mask]
253+
252254
if not isinstance(window_size, int):
253255
min_time = selected.index[-1] - window_size
254256
selected = selected.loc[selected.index > min_time]
255257
else:
256258
selected = selected.iloc[-window_size:]
259+
257260
len_selected = len(selected)
261+
258262
if (len_selected != window_size):
259263
warnings.warn((
260264
'Sequence shorter than window_size found: {} < {}. '

tests/custom/test_timeseries_preprocessing.py

Lines changed: 69 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def setUp(self):
260260
'id': [1] * 10 + [2] * 10
261261
}).set_index('timestamp')
262262

263+
"""Passing cutoff_time. The indicated column will be used as the cutoff time."""
263264
def test_cutoff_time_column(self):
264265
# setup
265266
timeseries = self.timeseries
@@ -274,14 +275,18 @@ def test_cutoff_time_column(self):
274275
)
275276

276277
# assert
277-
expected_array = np.array([[[2, 22],
278-
[3, 23],
279-
[4, 24]],
280-
[[14, 34],
281-
[15, 35],
282-
[16, 36]]])
278+
expected_array = np.array([
279+
[[2, 22],
280+
[3, 23],
281+
[4, 24]],
282+
[[14, 34],
283+
[15, 35],
284+
[16, 36]]
285+
])
286+
283287
assert_allclose(array, expected_array)
284288

289+
"""Passing time_index. The indicated column will be used as the timeseries index."""
285290
def test_time_index_column(self):
286291
# setup
287292
X = self.X
@@ -296,14 +301,18 @@ def test_time_index_column(self):
296301
)
297302

298303
# assert
299-
expected_array = np.array([[[2, 22],
300-
[3, 23],
301-
[4, 24]],
302-
[[14, 34],
303-
[15, 35],
304-
[16, 36]]])
304+
expected_array = np.array([
305+
[[2, 22],
306+
[3, 23],
307+
[4, 24]],
308+
[[14, 34],
309+
[15, 35],
310+
[16, 36]]
311+
])
312+
305313
assert_allclose(array, expected_array)
306314

315+
"""window_size accepts integer."""
307316
def test_window_size_integer(self):
308317
# setup
309318
X = self.X
@@ -317,14 +326,18 @@ def test_window_size_integer(self):
317326
)
318327

319328
# assert
320-
expected_array = np.array([[[2, 22],
321-
[3, 23],
322-
[4, 24]],
323-
[[14, 34],
324-
[15, 35],
325-
[16, 36]]])
329+
expected_array = np.array([
330+
[[2, 22],
331+
[3, 23],
332+
[4, 24]],
333+
[[14, 34],
334+
[15, 35],
335+
[16, 36]]
336+
])
337+
326338
assert_allclose(array, expected_array)
327339

340+
"""window_size accepts string."""
328341
def test_window_size_string(self):
329342
# setup
330343
X = self.X
@@ -338,14 +351,18 @@ def test_window_size_string(self):
338351
)
339352

340353
# assert
341-
expected_array = np.array([[[2, 22],
342-
[3, 23],
343-
[4, 24]],
344-
[[14, 34],
345-
[15, 35],
346-
[16, 36]]])
354+
expected_array = np.array([
355+
[[2, 22],
356+
[3, 23],
357+
[4, 24]],
358+
[[14, 34],
359+
[15, 35],
360+
[16, 36]]
361+
])
362+
347363
assert_allclose(array, expected_array)
348364

365+
"""window_size accepts Timedelta object."""
349366
def test_window_size_timedelta(self):
350367
# setup
351368
X = self.X
@@ -359,15 +376,19 @@ def test_window_size_timedelta(self):
359376
)
360377

361378
# assert
362-
expected_array = np.array([[[2, 22],
363-
[3, 23],
364-
[4, 24]],
365-
[[14, 34],
366-
[15, 35],
367-
[16, 36]]])
379+
expected_array = np.array([
380+
[[2, 22],
381+
[3, 23],
382+
[4, 24]],
383+
[[14, 34],
384+
[15, 35],
385+
[16, 36]]
386+
])
387+
368388
assert_allclose(array, expected_array)
369389

370-
def test_large_window_size(self):
390+
"""If there is not enough data for the given window_size, shape changes."""
391+
def test_not_enough_data(self):
371392
# setup
372393
X = self.X
373394
timeseries = self.timeseries
@@ -381,62 +402,35 @@ def test_large_window_size(self):
381402

382403
# assert
383404
assert len(array) == 2
384-
assert_allclose(
385-
array[0],
405+
406+
expected_array = np.array([
386407
np.array([
387408
[1, 21],
388409
[2, 22],
389410
[3, 23],
390411
[4, 24]
391-
])
392-
)
393-
assert_allclose(
394-
array[1],
412+
]),
395413
np.array([
396414
[12, 32],
397415
[13, 33],
398416
[14, 34],
399417
[15, 35],
400418
[16, 36]
401419
])
402-
)
420+
])
403421

404-
def test_window_size_zero(self):
405-
# setup
406-
X = self.X
407-
timeseries = self.timeseries
408-
409-
# run
410-
array = cutoff_window_sequences(
411-
X,
412-
timeseries,
413-
window_size=0,
414-
)
415-
416-
# assert
417-
assert len(array) == 2
418422
assert_allclose(
419423
array[0],
420-
np.array([
421-
[1, 21],
422-
[2, 22],
423-
[3, 23],
424-
[4, 24]
425-
])
424+
expected_array[0]
426425
)
426+
427427
assert_allclose(
428428
array[1],
429-
np.array([
430-
[11, 31],
431-
[12, 32],
432-
[13, 33],
433-
[14, 34],
434-
[15, 35],
435-
[16, 36]
436-
])
429+
expected_array[1]
437430
)
438431

439-
def test_not_id(self):
432+
"""Test X without any other column than cutoff_time."""
433+
def test_cutoff_time_only(self):
440434
# setup
441435
X = self.X
442436
del X['id']
@@ -451,28 +445,13 @@ def test_not_id(self):
451445
)
452446

453447
# assert
454-
expected_array = np.array([[[12, 32],
455-
[13, 33],
456-
[14, 34]],
457-
[[14, 34],
458-
[15, 35],
459-
[16, 36]]])
460-
assert_allclose(array, expected_array)
448+
expected_array = np.array([
449+
[[12, 32],
450+
[13, 33],
451+
[14, 34]],
452+
[[14, 34],
453+
[15, 35],
454+
[16, 36]]
455+
])
461456

462-
def test_not_values(self):
463-
# setup
464-
X = self.X
465-
timeseries = self.timeseries
466-
del timeseries['value1']
467-
del timeseries['value2']
468-
469-
# run
470-
array = cutoff_window_sequences(
471-
X,
472-
timeseries,
473-
window_size=3,
474-
)
475-
476-
# assert
477-
expected_array = np.array([[[], [], []], [[], [], []]])
478457
assert_allclose(array, expected_array)

0 commit comments

Comments
 (0)