Skip to content

Commit 20a00ab

Browse files
committed
refactoring
1 parent 79c2f04 commit 20a00ab

File tree

2 files changed

+66
-47
lines changed

2 files changed

+66
-47
lines changed

stock_prediction_deep_learning.py

Lines changed: 13 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,48 +18,12 @@
1818
import tensorflow as tf
1919
from sklearn.preprocessing import MinMaxScaler
2020
import datetime
21-
import numpy as np
2221
import yfinance as yf
23-
2422
from stock_prediction_lstm import LongShortTermMemory
23+
from stock_prediction_numpy import StockData
2524
from stock_prediction_plotter import Plotter
26-
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
27-
28-
29-
def data_verification(train):
30-
print('mean:', train.mean(axis=0))
31-
print('max', train.max())
32-
print('min', train.min())
33-
print('Std dev:', train.std(axis=0))
34-
35-
def load_data_transform(time_steps, min_max, training_data, test_data):
36-
train_scaled = min_max.fit_transform(training_data)
37-
data_verification(train_scaled)
3825

39-
# Training Data Transformation
40-
x_train = []
41-
y_train = []
42-
for i in range(time_steps, train_scaled.shape[0]):
43-
x_train.append(train_scaled[i - time_steps:i])
44-
y_train.append(train_scaled[i, 0])
45-
46-
x_train, y_train = np.array(x_train), np.array(y_train)
47-
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
48-
49-
total_data = pd.concat((training_data, test_data), axis=0)
50-
inputs = total_data[len(total_data) - len(test_data) - time_steps:]
51-
test_scaled = min_max.fit_transform(inputs)
52-
53-
# Testing Data Transformation
54-
x_test = []
55-
y_test = []
56-
for i in range(time_steps, test_scaled.shape[0]):
57-
x_test.append(test_scaled[i - time_steps:i])
58-
y_test.append(test_scaled[i, 0])
59-
60-
x_test, y_test = np.array(x_test), np.array(y_test)
61-
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
62-
return (x_train, y_train), (x_test, y_test)
26+
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'
6327

6428

6529
def train_LSTM_network(start_date, ticker, validation_date):
@@ -69,15 +33,16 @@ def train_LSTM_network(start_date, ticker, validation_date):
6933
data = data.reset_index()
7034
print(data)
7135

72-
plotter = Plotter(True, project_folder, sec.info['shortName'], sec.info['currency'], stock_ticker)
36+
plotter = Plotter(True, project_folder, sec.info['shortName'], sec.info['currency'], STOCK_TICKER)
7337

7438
training_data = data[data['Date'] < validation_date].copy()
7539
test_data = data[data['Date'] >= validation_date].copy()
7640
training_data = training_data.set_index('Date')
7741
test_data = test_data.set_index('Date')
7842
plotter.plot_histogram_data_split(training_data, test_data, validation_date)
7943

80-
(x_train, y_train), (x_test, y_test) = load_data_transform(60, min_max, training_data, test_data)
44+
data = StockData()
45+
(x_train, y_train), (x_test, y_test) = data.to_numpy(TIME_STEPS, min_max, training_data, test_data)
8146

8247
lstm = LongShortTermMemory(project_folder)
8348
model = lstm.create_model(x_train)
@@ -89,7 +54,7 @@ def train_LSTM_network(start_date, ticker, validation_date):
8954
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, mode='min', verbose=1)
9055

9156
model.compile(optimizer='adam', loss='mean_squared_error', metrics=defined_metrics)
92-
history = model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test),
57+
history = model.fit(x_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(x_test, y_test),
9358
callbacks=[callback])
9459
print("saving weights")
9560
model.save(os.path.join(project_folder, 'model_weights.h5'))
@@ -108,7 +73,7 @@ def train_LSTM_network(start_date, ticker, validation_date):
10873
test_predictions_baseline = pd.DataFrame(test_predictions_baseline)
10974
test_predictions_baseline.to_csv(os.path.join(project_folder, 'predictions.csv'))
11075

111-
test_predictions_baseline.rename(columns={0: stock_ticker + '_predicted'}, inplace=True)
76+
test_predictions_baseline.rename(columns={0: STOCK_TICKER + '_predicted'}, inplace=True)
11277
test_predictions_baseline = test_predictions_baseline.round(decimals=0)
11378
test_predictions_baseline.index = test_data.index
11479
plotter.project_plot_predictions(test_predictions_baseline, test_data)
@@ -117,13 +82,14 @@ def train_LSTM_network(start_date, ticker, validation_date):
11782

11883

11984
if __name__ == '__main__':
120-
stock_start_date = pd.to_datetime('2004-08-01')
121-
stock_ticker = 'GOOG'
122-
epochs = 100
123-
batch_size = 32
85+
STOCK_START_DATE = pd.to_datetime('2004-08-01')
86+
STOCK_TICKER = 'GOOG'
87+
EPOCHS = 100
88+
BATCH_SIZE = 32
89+
TIME_STEPS = 60
12490
token = secrets.token_hex(16)
12591
project_folder = os.path.join(os.getcwd(), token)
12692
if not os.path.exists(project_folder):
12793
os.makedirs(project_folder)
12894
stock_validation_date = pd.to_datetime('2017-01-01')
129-
train_LSTM_network(stock_start_date, stock_ticker, stock_validation_date)
95+
train_LSTM_network(STOCK_START_DATE, STOCK_TICKER, stock_validation_date)

stock_prediction_numpy.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright 2020 Jordi Corbilla. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ==============================================================================
15+
import numpy as np
16+
import pandas as pd
17+
18+
19+
class StockData:
20+
def __data_verification(self, train):
21+
print('mean:', train.mean(axis=0))
22+
print('max', train.max())
23+
print('min', train.min())
24+
print('Std dev:', train.std(axis=0))
25+
26+
def to_numpy(self, time_steps, min_max, training_data, test_data):
27+
train_scaled = min_max.fit_transform(training_data)
28+
self.__data_verification(train_scaled)
29+
30+
# Training Data Transformation
31+
x_train = []
32+
y_train = []
33+
for i in range(time_steps, train_scaled.shape[0]):
34+
x_train.append(train_scaled[i - time_steps:i])
35+
y_train.append(train_scaled[i, 0])
36+
37+
x_train, y_train = np.array(x_train), np.array(y_train)
38+
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
39+
40+
total_data = pd.concat((training_data, test_data), axis=0)
41+
inputs = total_data[len(total_data) - len(test_data) - time_steps:]
42+
test_scaled = min_max.fit_transform(inputs)
43+
44+
# Testing Data Transformation
45+
x_test = []
46+
y_test = []
47+
for i in range(time_steps, test_scaled.shape[0]):
48+
x_test.append(test_scaled[i - time_steps:i])
49+
y_test.append(test_scaled[i, 0])
50+
51+
x_test, y_test = np.array(x_test), np.array(y_test)
52+
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
53+
return (x_train, y_train), (x_test, y_test)

0 commit comments

Comments
 (0)