Skip to content

Commit 30388b5

Browse files
Add files via upload
1 parent b110fc3 commit 30388b5

File tree

1 file changed

+156
-0
lines changed

1 file changed

+156
-0
lines changed
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#================================================================
2+
#
3+
# File name : RL-Bitcoin-trading-bot_1.py
4+
# Author : PyLessons
5+
# Created date: 2020-12-02
6+
# Website : https://pylessons.com/
7+
# GitHub : https://github.com/pythonlessons/RL-Bitcoin-trading-bot
8+
# Description : Introduction to trading Crypto with Reinforcement Learning
9+
#
10+
#================================================================
11+
import pandas as pd
12+
import numpy as np
13+
import random
14+
from collections import deque
15+
16+
class CustomEnv:
17+
# A custom Bitcoin trading environment
18+
def __init__(self, df, initial_balance=1000, lookback_window_size=50):
19+
# Define action space and state size and other custom parameters
20+
self.df = df.dropna().reset_index()
21+
self.df_total_steps = len(self.df)-1
22+
self.initial_balance = initial_balance
23+
self.lookback_window_size = lookback_window_size
24+
25+
# Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
26+
self.action_space = np.array([0, 1, 2])
27+
28+
# Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps
29+
self.orders_history = deque(maxlen=self.lookback_window_size)
30+
31+
# Market history contains the OHCL values for the last lookback_window_size prices
32+
self.market_history = deque(maxlen=self.lookback_window_size)
33+
34+
# State size contains Market+Orders history for the last lookback_window_size steps
35+
self.state_size = (self.lookback_window_size, 10)
36+
37+
# Reset the state of the environment to an initial state
38+
def reset(self, env_steps_size = 0):
39+
self.balance = self.initial_balance
40+
self.net_worth = self.initial_balance
41+
self.prev_net_worth = self.initial_balance
42+
self.crypto_held = 0
43+
self.crypto_sold = 0
44+
self.crypto_bought = 0
45+
if env_steps_size > 0: # used for training dataset
46+
self.start_step = random.randint(self.lookback_window_size, self.df_total_steps - env_steps_size)
47+
self.end_step = self.start_step + env_steps_size
48+
else: # used for testing dataset
49+
self.start_step = self.lookback_window_size
50+
self.end_step = self.df_total_steps
51+
52+
self.current_step = self.start_step
53+
54+
for i in reversed(range(self.lookback_window_size)):
55+
current_step = self.current_step - i
56+
self.orders_history.append([self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held])
57+
self.market_history.append([self.df.loc[current_step, 'Open'],
58+
self.df.loc[current_step, 'High'],
59+
self.df.loc[current_step, 'Low'],
60+
self.df.loc[current_step, 'Close'],
61+
self.df.loc[current_step, 'Volume']
62+
])
63+
64+
state = np.concatenate((self.market_history, self.orders_history), axis=1)
65+
return state
66+
67+
# Get the data points for the given current_step
68+
def _next_observation(self):
69+
self.market_history.append([self.df.loc[self.current_step, 'Open'],
70+
self.df.loc[self.current_step, 'High'],
71+
self.df.loc[self.current_step, 'Low'],
72+
self.df.loc[self.current_step, 'Close'],
73+
self.df.loc[self.current_step, 'Volume']
74+
])
75+
obs = np.concatenate((self.market_history, self.orders_history), axis=1)
76+
return obs
77+
78+
# Execute one time step within the environment
79+
def step(self, action):
80+
self.crypto_bought = 0
81+
self.crypto_sold = 0
82+
self.current_step += 1
83+
84+
# Set the current price to a random price between open and close
85+
current_price = random.uniform(
86+
self.df.loc[self.current_step, 'Open'],
87+
self.df.loc[self.current_step, 'Close'])
88+
89+
if action == 0: # Hold
90+
pass
91+
92+
elif action == 1 and self.balance > 0:
93+
# Buy with 100% of current balance
94+
self.crypto_bought = self.balance / current_price
95+
self.balance -= self.crypto_bought * current_price
96+
self.crypto_held += self.crypto_bought
97+
98+
elif action == 2 and self.crypto_held>0:
99+
# Sell 100% of current crypto held
100+
self.crypto_sold = self.crypto_held
101+
self.balance += self.crypto_sold * current_price
102+
self.crypto_held -= self.crypto_sold
103+
104+
self.prev_net_worth = self.net_worth
105+
self.net_worth = self.balance + self.crypto_held * current_price
106+
107+
self.orders_history.append([self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held])
108+
109+
# Calculate reward
110+
reward = self.net_worth - self.prev_net_worth
111+
112+
if self.net_worth <= self.initial_balance/2:
113+
done = True
114+
else:
115+
done = False
116+
117+
obs = self._next_observation()
118+
119+
return obs, reward, done
120+
121+
# render environment
122+
def render(self):
123+
print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')
124+
125+
126+
def Random_games(env, train_episodes = 50, training_batch_size=500):
127+
average_net_worth = 0
128+
for episode in range(train_episodes):
129+
state = env.reset(env_steps_size = training_batch_size)
130+
131+
while True:
132+
env.render()
133+
134+
action = np.random.randint(3, size=1)[0]
135+
136+
state, reward, done = env.step(action)
137+
138+
if env.current_step == env.end_step:
139+
average_net_worth += env.net_worth
140+
print("net_worth:", env.net_worth)
141+
break
142+
143+
print("average_net_worth:", average_net_worth/train_episodes)
144+
145+
146+
df = pd.read_csv('./pricedata.csv')
147+
df = df.sort_values('Date')
148+
149+
lookback_window_size = 10
150+
train_df = df[:-720-lookback_window_size]
151+
test_df = df[-720-lookback_window_size:] # 30 days
152+
153+
train_env = CustomEnv(train_df, lookback_window_size=lookback_window_size)
154+
test_env = CustomEnv(test_df, lookback_window_size=lookback_window_size)
155+
156+
Random_games(train_env, train_episodes = 10, training_batch_size=500)

0 commit comments

Comments
 (0)