Changed all utils

thieu1995 · thieu1995 · commit 1a55287807a3 · 2020-04-07T03:00:52.000+09:00
diff --git a/utils/GraphUtil.py b/utils/GraphUtil.py
@@ -1,15 +1,18 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-"""
-Created on Sun Apr 15 09:05:56 2018
-@author: thieunv
-"""
+#!/usr/bin/env python
+# ------------------------------------------------------------------------------------------------------%
+# Created by "Thieu Nguyen" at 00:51, 29/03/2020                                                        %
+#                                                                                                       %
+#       Email:      nguyenthieu2102@gmail.com                                                           %
+#       Homepage:   https://www.researchgate.net/profile/Thieu_Nguyen6                                  %
+#       Github:     https://github.com/thieunguyen5991                                                  %
+# -------------------------------------------------------------------------------------------------------%
+
 #import matplotlib as mpl
 #mpl.use('Agg')
 import matplotlib.pyplot as plt
 
-def draw_predict(fig_id=None, y_test=None, y_pred=None, filename=None, pathsave=None):
-    plt.figure(fig_id)
+
+def draw_predict(y_test=None, y_pred=None, filename=None, pathsave=None):
     plt.plot(y_test)
     plt.plot(y_pred)
     plt.ylabel('CPU')
@@ -19,8 +22,8 @@ def draw_predict(fig_id=None, y_test=None, y_pred=None, filename=None, pathsave=
     plt.close()
     return None
 
-def draw_predict_with_error(fig_id=None, data=None, error=None, filename=None, pathsave=None):
-    plt.figure(fig_id)
+
+def draw_predict_with_error(data=None, error=None, filename=None, pathsave=None):
     plt.plot(data[0])
     plt.plot(data[1])
     plt.ylabel('Real value')
@@ -30,6 +33,7 @@ def draw_predict_with_error(fig_id=None, data=None, error=None, filename=None, p
     plt.close()
     return None
 
+
 def draw_raw_time_series_data(data=None, label=None, title=None, filename=None, pathsave=None):
     plt.plot(data)
     plt.xlabel(label["y"])
@@ -39,6 +43,7 @@ def draw_raw_time_series_data(data=None, label=None, title=None, filename=None,
     plt.close()
     return None
 
+
 def draw_raw_time_series_data_and_show(data=None, label=None, title=None):
     plt.plot(data)
     plt.xlabel(label["y"])
diff --git a/utils/IOUtil.py b/utils/IOUtil.py
@@ -1,59 +1,80 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-"""
-Created on Sun Apr 15 09:49:35 2018
-@author: thieunv
-"""
+#!/usr/bin/env python
+# ------------------------------------------------------------------------------------------------------%
+# Created by "Thieu Nguyen" at 00:51, 29/03/2020                                                        %
+#                                                                                                       %
+#       Email:      nguyenthieu2102@gmail.com                                                           %
+#       Homepage:   https://www.researchgate.net/profile/Thieu_Nguyen6                                  %
+#       Github:     https://github.com/thieunguyen5991                                                  %
+# -------------------------------------------------------------------------------------------------------%
 
 import numpy as np
 import pandas as pd
-import csv
-import os
+from csv import DictWriter
+from os import getcwd, path, makedirs
+
+
+def save_all_models_to_csv(item=None, log_filename=None, pathsave=None):
+    check_directory = getcwd() + "/" + pathsave
+    if not path.exists(check_directory):
+        makedirs(check_directory)
+    with open(pathsave + log_filename + ".csv", 'a') as file:
+        w = DictWriter(file, delimiter=',', lineterminator='\n', fieldnames=item.keys())
+        if file.tell() == 0:
+            w.writeheader()
+        w.writerow(item)
+
 
 def save_prediction_to_csv(y_test=None, y_pred=None, filename=None, pathsave=None):
-    t1 = np.concatenate((y_test, y_pred), axis=1)
-    np.savetxt(pathsave + filename + ".csv", t1, delimiter=",")
+    check_directory = getcwd() + "/" + pathsave
+    if not path.exists(check_directory):
+        makedirs(check_directory)
+
+    temp = np.concatenate((y_test, y_pred), axis=1)
+    np.savetxt(pathsave + filename + ".csv", temp, delimiter=",")
     return None
 
+
 def save_loss_train_to_csv(error=None, filename=None, pathsave=None):
     np.savetxt(pathsave + filename + ".csv", np.array(error), delimiter=",")
     return None
 
-def save_all_models_to_csv(item=None, log_filename=None, pathsave=None):
-    directory = os.getcwd() + "/" + pathsave
-    if not os.path.exists(directory):
-        os.makedirs(directory)
-    with open(pathsave + log_filename + ".csv", "a+") as file:
-        wr = csv.writer(file, dialect='excel')
-        wr.writerow(item)
+
+def load_dataset(path_to_data=None, cols=None):
+    df = pd.read_csv(path_to_data + ".csv", usecols=cols)
+    return df.values
+
 
 def save_run_test(num_run_test=None, data=None, filepath=None):
     t0 = np.reshape(data, (num_run_test, -1))
     np.savetxt(filepath, t0, delimiter=",")
 
+
 def load_prediction_results(pathfile=None, delimiter=",", header=None):
     df = pd.read_csv(pathfile, sep=delimiter, header=header)
     return df.values[:, 0:1], df.values[:, 1:2]
 
+
 def save_number_of_vms(data=None, pathfile=None):
     t0 = np.reshape(data, (-1, 1))
     np.savetxt(pathfile, t0, delimiter=",")
 
+
 def load_number_of_vms(pathfile=None, delimiter=",", header=None):
     df = pd.read_csv(pathfile, sep=delimiter, header=header)
     return df.values[:, 0:1]
 
 
-
 def save_scaling_results_to_csv(data=None, path_file=None):
     np.savetxt(path_file + ".csv", np.array(data), delimiter=",")
     return None
 
+
 def read_dataset_file(filepath=None, usecols=None, header=0, index_col=False, inplace=True):
     df = pd.read_csv(filepath, usecols=usecols, header=header, index_col=index_col)
     df.dropna(inplace=inplace)
     return df.values
 
+
 def save_formatted_data_csv(dataset=None, filename=None, pathsave=None):
     np.savetxt(pathsave + filename + ".csv", dataset, delimiter=",")
     return None
diff --git a/utils/MathUtil.py b/utils/MathUtil.py
@@ -1,75 +1,33 @@
-#!/usr/bin/env python2
-# -*- coding: utf-8 -*-
-"""
-Created on Sun Apr 15 10:13:13 2018
-@author: thieunv
-
-Link : http://sci-hub.tw/10.1109/iccat.2013.6521977
-https://en.wikipedia.org/wiki/Laguerre_polynomials
-
-"""
-
-import numpy as np
+#!/usr/bin/env python
+# ------------------------------------------------------------------------------------------------------%
+# Created by "Thieu Nguyen" at 00:51, 29/03/2020                                                        %
+#                                                                                                       %
+#       Email:      nguyenthieu2102@gmail.com                                                           %
+#       Homepage:   https://www.researchgate.net/profile/Thieu_Nguyen6                                  %
+#       Github:     https://github.com/thieunguyen5991                                                  %
+# -------------------------------------------------------------------------------------------------------%
+
+from numpy import where, maximum, power, multiply, exp
+from numpy import tanh as mytanh
 
 def itself(x):
     return x
 def elu(x, alpha=1):
-    return np.where(x < 0, alpha * (np.exp(x) - 1), x)
+    return where(x < 0, alpha * (exp(x) - 1), x)
 def relu(x):
-    return np.maximum(0, x)
+    return maximum(0, x)
 def tanh(x):
-    return np.tanh(x)
+    return mytanh(x)
 def sigmoid(x):
-    return 1.0 / (1.0 + np.exp(-x))
+    return 1.0 / (1.0 + exp(-x))
 
 def derivative_self(x):
     return 1
 def derivative_elu(x, alpha=1):
-    return np.where(x < 0, x + alpha, 1)
+    return where(x < 0, x + alpha, 1)
 def derivative_relu(x):
-    return np.where(x < 0, 0, 1)
+    return where(x < 0, 0, 1)
 def derivative_tanh(x):
-    return 1 - np.power(x, 2)
+    return 1 - power(x, 2)
 def derivative_sigmoid(x):
-    return np.multiply(x, 1-x)
-
-
-def expand_chebyshev(x):
-    x1 = x
-    x2 = 2 * np.power(x, 2) - 1
-    x3 = 4 * np.power(x, 3) - 3 * x
-    x4 = 8 * np.power(x, 4) - 8 * np.power(x, 2) + 1
-    x5 = 16 * np.power(x, 5) - 20 * np.power(x, 3) + 5 * x
-    return np.concatenate( (x1, x2, x3, x4, x5), axis=1 )
-
-def expand_legendre(x):
-    x1 = x
-    x2 = 1/2 * ( 3 * np.power(x, 2) - 1 )
-    x3 = 1/2 * (5 * np.power(x, 3) - 3 * x)
-    x4 = 1/8 * ( 35 * np.power(x, 4) - 30 * np.power(x, 2) + 3 )
-    x5 = 1/40 * ( 9 * np.power(x, 5) - 350 * np.power(x, 3) + 75 * x )
-    return np.concatenate((x1, x2, x3, x4, x5), axis=1 )
-
-def expand_laguerre(x):
-    x1 = -x + 1
-    x2 = 1/2 * ( np.power(x, 2) - 4 * x + 2)
-    x3 = 1/6 * (-np.power(x, 3) + 9 * np.power(x, 2) - 18 * x + 6)
-    x4 = 1/24 * (np.power(x, 4) - 16 * np.power(x, 3) + 72 * np.power(x, 2) - 96*x + 24)
-    x5 = 1/120 * (-np.power(x, 5) + 25 * np.power(x, 4) - 200 * np.power(x, 3) + 600 * np.power(x, 2) - 600 * x + 120)
-    return np.concatenate((x1, x2, x3, x4, x5), axis=1 )
-
-def expand_power(x):
-    x1 = x
-    x2 = x1 + np.power(x, 2)
-    x3 = x2 + np.power(x, 3)
-    x4 = x3 + np.power(x, 4)
-    x5 = x4 + np.power(x, 5)
-    return np.concatenate((x1, x2, x3, x4, x5), axis=1)
-
-def expand_trigonometric(x):
-    x1 = x
-    x2 = np.sin(np.pi * x) + np.cos(np.pi * x)
-    x3 = np.sin(2 * np.pi * x) + np.cos(2 * np.pi * x)
-    x4 = np.sin(3 * np.pi * x) + np.cos(3 * np.pi * x)
-    x5 = np.sin(4 * np.pi * x) + np.cos(4 * np.pi * x)
-    return np.concatenate((x1, x2, x3, x4, x5), axis=1)
+    return multiply(x, 1-x)
diff --git a/utils/MeasureUtil.py b/utils/MeasureUtil.py
@@ -1,7 +1,17 @@
+#!/usr/bin/env python
+# ------------------------------------------------------------------------------------------------------%
+# Created by "Thieu Nguyen" at 00:51, 29/03/2020                                                        %
+#                                                                                                       %
+#       Email:      nguyenthieu2102@gmail.com                                                           %
+#       Homepage:   https://www.researchgate.net/profile/Thieu_Nguyen6                                  %
+#       Github:     https://github.com/thieunguyen5991                                                  %
+# -------------------------------------------------------------------------------------------------------%
+
 from sklearn.metrics import explained_variance_score, mean_absolute_error, mean_squared_error, mean_squared_log_error, median_absolute_error, r2_score
 import numpy as np
 
-class MeasureTimeSeries(object):
+
+class MeasureTimeSeries:
     def __init__(self, y_true, y_pred, multi_output=None, number_rounding=3):
         """
         :param y_true:
diff --git a/utils/PreprocessingUtil.py b/utils/PreprocessingUtil.py
@@ -1,48 +1,18 @@
+#!/usr/bin/env python
+# ------------------------------------------------------------------------------------------------------%
+# Created by "Thieu Nguyen" at 00:51, 29/03/2020                                                        %
+#                                                                                                       %
+#       Email:      nguyenthieu2102@gmail.com                                                           %
+#       Homepage:   https://www.researchgate.net/profile/Thieu_Nguyen6                                  %
+#       Github:     https://github.com/thieunguyen5991                                                  %
+# -------------------------------------------------------------------------------------------------------%
+
 import numpy as np
-from math import floor
 from copy import deepcopy
-import pandas as pd
-import matplotlib.pyplot as plt
-
-class CheckingData(object):
-    """
-    Checking whether data is stationary or non-stationary (trend, seasonality, ...)
-    https://machinelearningmastery.com/time-series-data-stationary-python/
-    https://machinelearningmastery.com/difference-time-series-dataset-python/
-    """
-    def __init__(self, pathfile=None):
-        self.series = pd.Series.from_csv(pathfile, header=0)
-
-    def check_by_plot_raw_data(self):
-        self.series.plot()
-        plt.show()
-
-    def check_by_summary_statistic(self, draw_history=True):
-        """
-        You can split your time series into two (or more) partitions and compare the mean and variance of each group.
-        If they differ and the difference is statistically significant, the time series is likely non-stationary.
-
-        Because we are looking at the mean and variance, we are assuming that the data conforms to a Gaussian
-        (also called the bell curve or normal) distribution. ==> Stationary
-        """
-        X = self.series.values
-        split = int(len(X) / 2)
-        X1, X2 = X[0:split], X[split:]
-        mean1, mean2 = X1.mean(), X2.mean()
-        var1, var2 = X1.var(), X2.var()
-
-        self.series.hist()
-        print('mean1=%f, mean2=%f' % (mean1, mean2))
-        print('variance1=%f, variance2=%f' % (var1, var2))
 
-        if draw_history:
-            self.series.hist()
-            plt.show()
 
-
-class TimeSeries(object):
-    def __init__(self, dataset=None, data_idx=None, sliding=None, output_index=None,
-                 method_statistic=0, minmax_scaler=None):
+class TimeSeries:
+    def __init__(self, dataset=None, data_idx=None, sliding=None, output_index=None, method_statistic=0, minmax_scaler=None):
         '''
         :param data_idx:
         :param sliding:
@@ -199,56 +169,3 @@ def _preprocessing_3d__(self):
             # y_valid = y_valid.flatten()
             # y_test = y_test.flatten()
         return X_train, y_train, X_valid, y_valid, X_test, y_test, self.minmax_scaler
-
-
-    def __difference__(self, interval = 1):
-        """
-        :param interval:  https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/
-        :return:
-        """
-        diff = list()
-        for i in range(interval, len(self.original_dataset)):
-            value = self.original_dataset[i] - self.original_dataset[i - interval]
-            diff.append(value)
-        #return Series(diff)
-
-    # invert differenced value
-    def __inverse_difference__(self, history, yhat, interval=1):
-        return yhat + history[-interval]
-
-
-class MiniBatch(object):
-    def __init__(self, X_train, y_train, batch_size):
-        self.X_train = X_train
-        self.y_train = y_train
-        self.batch_size = batch_size
-
-    def random_mini_batches(self, seed=None):
-        X, Y = self.X_train.T, self.y_train.T
-        mini_batch_size = self.batch_size
-
-        m = X.shape[1]  # number of training examples
-        mini_batches = []
-        np.random.seed(seed)
-
-        # Step 1: Shuffle (X, Y)
-        permutation = list(np.random.permutation(m))
-        shuffled_X = X[:, permutation]
-        shuffled_Y = Y[:, permutation].reshape((Y.shape[0], m))
-
-        # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
-        num_complete_minibatches = int(floor(m / mini_batch_size))  # number of mini batches of size mini_batch_size in your partitionning
-        for k in range(0, num_complete_minibatches):
-            mini_batch_X = shuffled_X[:, k * mini_batch_size: (k+1) * mini_batch_size]
-            mini_batch_Y = shuffled_Y[:, k * mini_batch_size: (k+1) * mini_batch_size]
-            mini_batch = (mini_batch_X, mini_batch_Y)
-            mini_batches.append(mini_batch)
-
-        # Handling the end case (last mini-batch < mini_batch_size)
-        if m % mini_batch_size != 0:
-            mini_batch_X = shuffled_X[:, num_complete_minibatches * mini_batch_size: m]
-            mini_batch_Y = shuffled_Y[:, num_complete_minibatches * mini_batch_size: m]
-            mini_batch = (mini_batch_X, mini_batch_Y)
-            mini_batches.append(mini_batch)
-
-        return mini_batches