1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Sun Jun 6 10:57:01 2021
4
+
5
+ @author: Ayush
6
+ """
7
+
8
+ #Import Dependencies
9
+ import random
10
+ import numpy as np
11
+ import flappy_bird_gym
12
+ from collections import deque
13
+ from tensorflow .keras .layers import Input , Dense
14
+ from tensorflow .keras .models import load_model , save_model , Sequential
15
+ from tensorflow .keras .optimizers import RMSprop
16
+
17
+ # Neural Network for Agent
18
+
19
+ def NeuralNetwork (input_shape ,output_shape ):
20
+ model = Sequential ()
21
+ model .add (Input (input_shape ))
22
+ model .add (Dense (512 , input_shape = input_shape , activation = 'relu' ,kernel_initializer = 'he_uniform' ))
23
+ model .add (Dense (256 , activation = 'relu' ,kernel_initializer = 'he_uniform' ))
24
+ model .add (Dense (64 , activation = 'relu' ,kernel_initializer = 'he_uniform' ))
25
+ model .add (Dense (output_shape , activation = 'linear' ,kernel_initializer = 'he_uniform' ))
26
+ model .compile (loss = 'mse' ,optimizer = RMSprop (lr = 0.0001 ,rho = 0.95 , epsilon = 0.01 ),metrics = ['accuracy' ])
27
+ model .summary ()
28
+ return model
29
+
30
+
31
+ #Brain of Agent || BluePrint of Agent
32
+
33
+ class DQNAgent :
34
+ def __init__ (self ):
35
+ self .env = flappy_bird_gym .make ("FlappyBird-v0" )
36
+ self .episodes = 1000
37
+ self .state_space = self .env .observation_space .shape [0 ]
38
+ self .action_space = self .env .action_space .n
39
+ self .memory = deque (maxlen = 2000 )
40
+
41
+ #Hyperparameters
42
+ self .gamma = 0.95
43
+ self .epsilon = 1
44
+ self .epsilon_decay = 0.9999
45
+ self .epsilon_min = 0.01
46
+ self .batch_number = 64 #16, 32, 128, 256
47
+
48
+ self .train_start = 1000
49
+ self .jump_prob = 0.01
50
+ self .model = NeuralNetwork (input_shape = (self .state_space ,), output_shape = self .action_space )
51
+
52
+ def act (self ,state ):
53
+ if np .random .random () > self .epsilon :
54
+ return np .argmax (self .model .predict (state ))
55
+ return 1 if np .random .random () < self .jump_prob else 0
56
+
57
+ def learn (self ):
58
+ #Make sure we have enough data
59
+ if len (self .memory ) < self .train_start :
60
+ return
61
+
62
+ #Create minibatch
63
+ minibatch = random .sample (self .memory , min (len (self .memory ), self .batch_number ))
64
+ #Variables to store minibatch info
65
+ state = np .zeros ((self .batch_number ,self .state_space ))
66
+ next_state = np .zeros ((self .batch_number ,self .state_space ))
67
+
68
+ action , reward , done = [], [], []
69
+
70
+
71
+ #Store data in variables
72
+ for i in range (self .batch_number ):
73
+ state [i ] = minibatch [i ][0 ]
74
+ action .append (minibatch [i ][1 ])
75
+ reward .append (minibatch [i ][2 ])
76
+ next_state [i ] = minibatch [i ][3 ]
77
+ done .append (minibatch [i ][4 ])
78
+
79
+ #Predict y label
80
+ target = self .model .predict (state )
81
+ target_next = self .model .predict (next_state )
82
+
83
+ for i in range (self .batch_number ):
84
+ if done [i ]:
85
+ target [i ][action [i ]] = reward [i ]
86
+ else :
87
+ target [i ][action [i ]] = reward [i ] + self .gamma * (np .argmax (target_next [i ]))
88
+ print ('training' )
89
+ self .model .fit (state , target , batch_size = self .batch_number , verbose = 0 )
90
+
91
+ def train (self ):
92
+ #n episode Iterartions for training
93
+ for i in range (self .episodes ):
94
+ # Environment variables for training
95
+ state = self .env .reset ()
96
+ state = np .reshape (state ,[1 , self .state_space ])
97
+ done = False
98
+ score = 0
99
+ self .epsilon = self .epsilon * self .epsilon_decay if self .epsilon * self .epsilon_decay > self .epsilon_min else self .epsilon_min
100
+
101
+ while not done :
102
+ self .env .render ()
103
+ action = self .act (state )
104
+ next_state , reward , done , info = self .env .step (action )
105
+
106
+ #reshape nextstate
107
+ next_state = np .reshape (next_state ,[1 ,self .state_space ])
108
+ score += 1
109
+ if done :
110
+ reward -= 100
111
+
112
+ self .memory .append ((state , action , reward , next_state , done ))
113
+ state = next_state
114
+
115
+ if done :
116
+ print ("Episode: {}\n Score: {}\n Epsilon: {:.2}" .format (i ,score ,self .epsilon ))
117
+ #Save model
118
+ if score >= 1000 :
119
+ self .model .save_model ('flappybrain.h5' )
120
+ self .learn ()
121
+
122
+ def perform (self ):
123
+ self .model = load_model ('flappybrain.h5' )
124
+ while 1 :
125
+ state = self .env .reset ()
126
+ state = np .reshape (state ,[1 ,self .state_space ])
127
+ done = False
128
+ score = 0
129
+
130
+ while not done :
131
+ self .env .render ()
132
+ action = np .argmax (self .model .predict (state ))
133
+ next_state , reward , done , info = self .env .step (action )
134
+ state = np .reshape (next_state ,[1 ,self .state_space ])
135
+ score += 1
136
+
137
+ print ("Current Score: {}" .format (score ))
138
+
139
+ if done :
140
+ print ('DEAD' )
141
+ break
142
+
143
+
144
+
145
+ if __name__ == '__main__' :
146
+ agent = DQNAgent ()
147
+ agent .train ()
148
+ #agent.perform()
149
+
0 commit comments