|
| 1 | +from datetime import datetime, timedelta |
| 2 | +from api import get_need_data |
| 3 | +import time |
| 4 | + |
| 5 | +import math |
| 6 | +import numpy as np |
| 7 | +import pandas as pd |
| 8 | +from sklearn.preprocessing import MinMaxScaler |
| 9 | +from keras.models import Sequential |
| 10 | +from keras.layers import Dense, LSTM |
| 11 | +import matplotlib.pyplot as plt |
| 12 | + |
| 13 | +plt.style.use('fivethirtyeight') |
| 14 | + |
| 15 | +import plotly.graph_objects as go |
| 16 | +from plotly.subplots import make_subplots |
| 17 | + |
| 18 | +START_TIME = datetime(2020, 1, 1) |
| 19 | +END_TIME = datetime(2020, 5, 31) |
| 20 | + |
| 21 | +pairs = ['BTC USD', 'ETH USD', 'LTC USD'] |
| 22 | + |
| 23 | + |
| 24 | +def get_data(time_from, time_to, pair): |
| 25 | + data = get_need_data(time_from, time_to, pair) |
| 26 | + |
| 27 | + for sth in data: |
| 28 | + sth['diff'] = (sth['open'] - sth['close']) / sth['open'] * 100 |
| 29 | + |
| 30 | + data_frame = pd.DataFrame(data) |
| 31 | + data_frame = data_frame.drop(columns=['quoteVolume', 'weightedAverage']) |
| 32 | + |
| 33 | + return data_frame |
| 34 | + |
| 35 | + |
| 36 | +def simulation(days, time_from, time_to, pair, iterations): |
| 37 | + df = get_data_frame(time_from, time_to, pair) |
| 38 | + df2 = get_data_frame(time_from, time_to, pair) |
| 39 | + dataset2 = df2.values |
| 40 | + data = df.filter(['high', 'low', 'open', 'close', 'volume']) |
| 41 | + dataset = data.values |
| 42 | + |
| 43 | + day = END_TIME |
| 44 | + |
| 45 | + scaler = MinMaxScaler(feature_range=(0, 1)) |
| 46 | + scaled_data = scaler.fit_transform(dataset) |
| 47 | + |
| 48 | + train_data = scaled_data.copy() |
| 49 | + x_train = [] |
| 50 | + y_train = [] |
| 51 | + |
| 52 | + if len(train_data) >= 100: |
| 53 | + for i in range(60, len(train_data)): |
| 54 | + x_train.append(train_data[i - 60:i, :]) |
| 55 | + y_train.append(train_data[i, :]) |
| 56 | + else: |
| 57 | + helper = math.ceil((len(train_data) / 60) * 10) |
| 58 | + for i in range(helper, len(train_data)): |
| 59 | + x_train.append(train_data[i - helper:i, :]) |
| 60 | + y_train.append(train_data[i, :]) |
| 61 | + |
| 62 | + x_train, y_train = np.array(x_train), np.array(y_train) |
| 63 | + |
| 64 | + model = Sequential() |
| 65 | + model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2]))) |
| 66 | + model.add(LSTM(50, return_sequences=False)) |
| 67 | + model.add(Dense(25)) |
| 68 | + model.add(Dense(5)) |
| 69 | + |
| 70 | + model.compile(optimizer='adam', loss='mean_squared_error') |
| 71 | + |
| 72 | + model.fit(x_train, y_train, batch_size=1, epochs=1) |
| 73 | + |
| 74 | + last_days = df[-60:].filter(['high', 'low', 'open', 'close', 'volume']) |
| 75 | + last_days = last_days.values |
| 76 | + last_days_scaled = scaler.transform(last_days) |
| 77 | + x_test = [] |
| 78 | + x_test.append(last_days_scaled) |
| 79 | + x_test = np.array(x_test) |
| 80 | + result = [] |
| 81 | + |
| 82 | + for i in range(days): |
| 83 | + price = model.predict(x_test, use_multiprocessing=True) |
| 84 | + price = list(price) |
| 85 | + arr = [] |
| 86 | + |
| 87 | + for j in range(len(x_test[0])): |
| 88 | + sth = x_test[0][j] |
| 89 | + sth = list(sth) |
| 90 | + arr.append(sth) |
| 91 | + |
| 92 | + arr.append(price[0]) |
| 93 | + result.append(price[0]) |
| 94 | + arr = list(arr) |
| 95 | + del arr[0] |
| 96 | + |
| 97 | + arr = np.array(arr) |
| 98 | + arr = np.reshape(arr, (1, arr.shape[0], arr.shape[1])) |
| 99 | + |
| 100 | + x_test = arr |
| 101 | + |
| 102 | + result = scaler.inverse_transform(result) |
| 103 | + |
| 104 | + dataset2 = list(dataset2) |
| 105 | + |
| 106 | + for i in range(len(result)): |
| 107 | + sth = [] |
| 108 | + sth.append(datetime.timestamp(day)) |
| 109 | + for j in range(len(result[0])): |
| 110 | + sth.append(result[i][j]) |
| 111 | + sth.append(((result[i][2] - result[i][3]) / result[i][2] * 100)) |
| 112 | + sth = np.array(sth) |
| 113 | + dataset2.append(sth) |
| 114 | + day += timedelta(days=1) |
| 115 | + |
| 116 | + new_df = pd.DataFrame(dataset2, columns=['time', 'high', 'low', 'open', 'close', 'volume', 'diff']) |
| 117 | + |
| 118 | + plot(new_df, pair) |
| 119 | + |
| 120 | + |
| 121 | +def plot(df, pair): |
| 122 | + df['time'] = df['time'].apply(lambda x: datetime.fromtimestamp(x)) |
| 123 | + print(df) |
| 124 | + |
| 125 | + fig = make_subplots(rows=2, cols=2, |
| 126 | + subplot_titles=('Stock prices', 'Percentage change', 'Volume')) |
| 127 | + fig.add_trace( |
| 128 | + go.Candlestick( |
| 129 | + name='Stock prices', |
| 130 | + x=df['time'], |
| 131 | + open=df['open'], |
| 132 | + high=df['high'], |
| 133 | + low=df['low'], |
| 134 | + close=df['close'], |
| 135 | + ), |
| 136 | + row=1, col=1, |
| 137 | + ) |
| 138 | + fig.add_trace( |
| 139 | + go.Bar( |
| 140 | + name='Percentage change', |
| 141 | + x=df['time'], |
| 142 | + y=df['diff'], |
| 143 | + ), |
| 144 | + row=1, col=2, |
| 145 | + ) |
| 146 | + fig.add_trace( |
| 147 | + go.Bar( |
| 148 | + name='Volume', |
| 149 | + x=df['time'], |
| 150 | + y=df['volume'], |
| 151 | + ), |
| 152 | + row=2, col=1, |
| 153 | + ) |
| 154 | + |
| 155 | + line_date = END_TIME - timedelta(hours=12) |
| 156 | + fig.update_layout(title_text=f'{pairs[pair-1]} predictions', |
| 157 | + shapes=[dict( |
| 158 | + x0=line_date, x1=line_date, y0=0, y1=1, xref='x', yref='paper', line_width=2, |
| 159 | + )]) |
| 160 | + |
| 161 | + fig.show() |
| 162 | + |
| 163 | + |
| 164 | +def get_data_frame(time_from, time_to, pair): |
| 165 | + if time_from >= time.time() or time_from >= time_to: |
| 166 | + raise ValueError("Wrong date range") |
| 167 | + |
| 168 | + data_frame = get_data(time_from, time_to, pair) |
| 169 | + return data_frame |
| 170 | + |
| 171 | + |
| 172 | +if __name__ == '__main__': |
| 173 | + simulation(20, datetime.timestamp(START_TIME), datetime.timestamp(END_TIME), 1, 100) |
0 commit comments