|
| 1 | +#!/usr/bin/env python |
| 2 | +# ------------------------------------------------------------------------------------------------------% |
| 3 | +# Created by "Thieu Nguyen" at 00:51, 29/03/2020 % |
| 4 | +# % |
| 5 | + |
| 6 | +# Homepage: https://www.researchgate.net/profile/Thieu_Nguyen6 % |
| 7 | +# Github: https://github.com/thieunguyen5991 % |
| 8 | +# -------------------------------------------------------------------------------------------------------% |
| 9 | + |
1 | 10 | import numpy as np |
2 | | -from math import floor |
3 | 11 | from copy import deepcopy |
4 | | -import pandas as pd |
5 | | -import matplotlib.pyplot as plt |
6 | | - |
7 | | -class CheckingData(object): |
8 | | - """ |
9 | | - Checking whether data is stationary or non-stationary (trend, seasonality, ...) |
10 | | - https://machinelearningmastery.com/time-series-data-stationary-python/ |
11 | | - https://machinelearningmastery.com/difference-time-series-dataset-python/ |
12 | | - """ |
13 | | - def __init__(self, pathfile=None): |
14 | | - self.series = pd.Series.from_csv(pathfile, header=0) |
15 | | - |
16 | | - def check_by_plot_raw_data(self): |
17 | | - self.series.plot() |
18 | | - plt.show() |
19 | | - |
20 | | - def check_by_summary_statistic(self, draw_history=True): |
21 | | - """ |
22 | | - You can split your time series into two (or more) partitions and compare the mean and variance of each group. |
23 | | - If they differ and the difference is statistically significant, the time series is likely non-stationary. |
24 | | -
|
25 | | - Because we are looking at the mean and variance, we are assuming that the data conforms to a Gaussian |
26 | | - (also called the bell curve or normal) distribution. ==> Stationary |
27 | | - """ |
28 | | - X = self.series.values |
29 | | - split = int(len(X) / 2) |
30 | | - X1, X2 = X[0:split], X[split:] |
31 | | - mean1, mean2 = X1.mean(), X2.mean() |
32 | | - var1, var2 = X1.var(), X2.var() |
33 | | - |
34 | | - self.series.hist() |
35 | | - print('mean1=%f, mean2=%f' % (mean1, mean2)) |
36 | | - print('variance1=%f, variance2=%f' % (var1, var2)) |
37 | 12 |
|
38 | | - if draw_history: |
39 | | - self.series.hist() |
40 | | - plt.show() |
41 | 13 |
|
42 | | - |
43 | | -class TimeSeries(object): |
44 | | - def __init__(self, dataset=None, data_idx=None, sliding=None, output_index=None, |
45 | | - method_statistic=0, minmax_scaler=None): |
| 14 | +class TimeSeries: |
| 15 | + def __init__(self, dataset=None, data_idx=None, sliding=None, output_index=None, method_statistic=0, minmax_scaler=None): |
46 | 16 | ''' |
47 | 17 | :param data_idx: |
48 | 18 | :param sliding: |
@@ -199,56 +169,3 @@ def _preprocessing_3d__(self): |
199 | 169 | # y_valid = y_valid.flatten() |
200 | 170 | # y_test = y_test.flatten() |
201 | 171 | return X_train, y_train, X_valid, y_valid, X_test, y_test, self.minmax_scaler |
202 | | - |
203 | | - |
204 | | - def __difference__(self, interval = 1): |
205 | | - """ |
206 | | - :param interval: https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/ |
207 | | - :return: |
208 | | - """ |
209 | | - diff = list() |
210 | | - for i in range(interval, len(self.original_dataset)): |
211 | | - value = self.original_dataset[i] - self.original_dataset[i - interval] |
212 | | - diff.append(value) |
213 | | - #return Series(diff) |
214 | | - |
215 | | - # invert differenced value |
216 | | - def __inverse_difference__(self, history, yhat, interval=1): |
217 | | - return yhat + history[-interval] |
218 | | - |
219 | | - |
220 | | -class MiniBatch(object): |
221 | | - def __init__(self, X_train, y_train, batch_size): |
222 | | - self.X_train = X_train |
223 | | - self.y_train = y_train |
224 | | - self.batch_size = batch_size |
225 | | - |
226 | | - def random_mini_batches(self, seed=None): |
227 | | - X, Y = self.X_train.T, self.y_train.T |
228 | | - mini_batch_size = self.batch_size |
229 | | - |
230 | | - m = X.shape[1] # number of training examples |
231 | | - mini_batches = [] |
232 | | - np.random.seed(seed) |
233 | | - |
234 | | - # Step 1: Shuffle (X, Y) |
235 | | - permutation = list(np.random.permutation(m)) |
236 | | - shuffled_X = X[:, permutation] |
237 | | - shuffled_Y = Y[:, permutation].reshape((Y.shape[0], m)) |
238 | | - |
239 | | - # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case. |
240 | | - num_complete_minibatches = int(floor(m / mini_batch_size)) # number of mini batches of size mini_batch_size in your partitionning |
241 | | - for k in range(0, num_complete_minibatches): |
242 | | - mini_batch_X = shuffled_X[:, k * mini_batch_size: (k+1) * mini_batch_size] |
243 | | - mini_batch_Y = shuffled_Y[:, k * mini_batch_size: (k+1) * mini_batch_size] |
244 | | - mini_batch = (mini_batch_X, mini_batch_Y) |
245 | | - mini_batches.append(mini_batch) |
246 | | - |
247 | | - # Handling the end case (last mini-batch < mini_batch_size) |
248 | | - if m % mini_batch_size != 0: |
249 | | - mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size: m] |
250 | | - mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size: m] |
251 | | - mini_batch = (mini_batch_X, mini_batch_Y) |
252 | | - mini_batches.append(mini_batch) |
253 | | - |
254 | | - return mini_batches |
0 commit comments