Skip to content

Commit acd1a59

Browse files
author
Ayush Varshney
committed
Flappy Bird Agent
1 parent 2d6e0a3 commit acd1a59

File tree

4 files changed

+188
-0
lines changed

4 files changed

+188
-0
lines changed

FlappyBird/.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"python.pythonPath": "C:\\Users\\Ayush\\.conda\\envs\\tensorflow\\python.exe"
3+
}

FlappyBird/Flappy.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Jun 6 10:57:01 2021
4+
5+
@author: Ayush
6+
"""
7+
8+
#Import Dependencies
9+
import random
10+
import numpy as np
11+
import flappy_bird_gym
12+
from collections import deque
13+
from tensorflow.keras.layers import Input, Dense
14+
from tensorflow.keras.models import load_model, save_model, Sequential
15+
from tensorflow.keras.optimizers import RMSprop
16+
17+
# Neural Network for Agent
18+
19+
def NeuralNetwork(input_shape,output_shape):
20+
model = Sequential()
21+
model.add(Input(input_shape))
22+
model.add(Dense(512, input_shape = input_shape, activation ='relu',kernel_initializer='he_uniform'))
23+
model.add(Dense(256, activation ='relu',kernel_initializer='he_uniform'))
24+
model.add(Dense(64, activation ='relu',kernel_initializer='he_uniform'))
25+
model.add(Dense(output_shape, activation ='linear',kernel_initializer='he_uniform'))
26+
model.compile(loss='mse',optimizer=RMSprop(lr=0.0001,rho=0.95, epsilon=0.01),metrics=['accuracy'])
27+
model.summary()
28+
return model
29+
30+
31+
#Brain of Agent || BluePrint of Agent
32+
33+
class DQNAgent:
34+
def __init__(self):
35+
self.env = flappy_bird_gym.make("FlappyBird-v0")
36+
self.episodes = 1000
37+
self.state_space = self.env.observation_space.shape[0]
38+
self.action_space = self.env.action_space.n
39+
self.memory = deque(maxlen=2000)
40+
41+
#Hyperparameters
42+
self.gamma = 0.95
43+
self.epsilon=1
44+
self.epsilon_decay = 0.9999
45+
self.epsilon_min = 0.01
46+
self.batch_number = 64 #16, 32, 128, 256
47+
48+
self.train_start = 1000
49+
self.jump_prob = 0.01
50+
self.model= NeuralNetwork(input_shape=(self.state_space,), output_shape=self.action_space)
51+
52+
def act(self,state):
53+
if np.random.random() > self.epsilon:
54+
return np.argmax(self.model.predict(state))
55+
return 1 if np.random.random() < self.jump_prob else 0
56+
57+
def learn(self):
58+
#Make sure we have enough data
59+
if len(self.memory) < self.train_start:
60+
return
61+
62+
#Create minibatch
63+
minibatch = random.sample(self.memory, min(len(self.memory), self.batch_number))
64+
#Variables to store minibatch info
65+
state = np.zeros((self.batch_number,self.state_space))
66+
next_state = np.zeros((self.batch_number,self.state_space))
67+
68+
action, reward, done = [], [], []
69+
70+
71+
#Store data in variables
72+
for i in range(self.batch_number):
73+
state[i] = minibatch[i][0]
74+
action.append(minibatch[i][1])
75+
reward.append(minibatch[i][2])
76+
next_state[i] = minibatch[i][3]
77+
done.append(minibatch[i][4])
78+
79+
#Predict y label
80+
target = self.model.predict(state)
81+
target_next = self.model.predict(next_state)
82+
83+
for i in range(self.batch_number):
84+
if done[i]:
85+
target[i][action[i]] = reward[i]
86+
else:
87+
target[i][action[i]] = reward[i] + self.gamma *(np.argmax(target_next[i]))
88+
print('training')
89+
self.model.fit(state, target, batch_size=self.batch_number, verbose=0)
90+
91+
def train(self):
92+
#n episode Iterartions for training
93+
for i in range(self.episodes):
94+
# Environment variables for training
95+
state = self.env.reset()
96+
state = np.reshape(state,[1, self.state_space])
97+
done= False
98+
score=0
99+
self.epsilon =self.epsilon * self.epsilon_decay if self.epsilon * self.epsilon_decay > self.epsilon_min else self.epsilon_min
100+
101+
while not done:
102+
self.env.render()
103+
action = self.act(state)
104+
next_state, reward, done, info = self.env.step(action)
105+
106+
#reshape nextstate
107+
next_state = np.reshape(next_state,[1,self.state_space])
108+
score +=1
109+
if done:
110+
reward -= 100
111+
112+
self.memory.append((state, action, reward, next_state, done))
113+
state = next_state
114+
115+
if done:
116+
print("Episode: {}\nScore: {}\nEpsilon: {:.2}".format(i,score,self.epsilon))
117+
#Save model
118+
if score >= 1000:
119+
self.model.save_model('flappybrain.h5')
120+
self.learn()
121+
122+
def perform(self):
123+
self.model = load_model('flappybrain.h5')
124+
while 1:
125+
state = self.env.reset()
126+
state = np.reshape(state,[1,self.state_space])
127+
done = False
128+
score=0
129+
130+
while not done:
131+
self.env.render()
132+
action = np.argmax(self.model.predict(state))
133+
next_state, reward, done, info = self.env.step(action)
134+
state = np.reshape(next_state,[1,self.state_space])
135+
score +=1
136+
137+
print("Current Score: {}".format(score))
138+
139+
if done:
140+
print('DEAD')
141+
break
142+
143+
144+
145+
if __name__ == '__main__':
146+
agent = DQNAgent()
147+
agent.train()
148+
#agent.perform()
149+

FlappyBird/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# FlappyBird
2+
This repository contains the implementation of two OpenAI Gym environments for the Flappy Bird game. The implementation of the game's logic and graphics was based on the FlapPyBird project.<br>
3+
4+
### In this flappy.py file I train the Neural Network model with reinforcement learning approach.
5+
## Performance of trained Agent
6+
https://user-images.githubusercontent.com/51924622/121337387-1dca1080-c93a-11eb-9b9f-61bf00bc327f.mp4
7+
8+
<br>
9+
To check the performance and visualize the agent uncomment the agent.perform() and comment the agent.train() in the flappy.py file. <br>
10+
if __name__ == '__main__':<br>
11+
&nbsp; agent = DQNAgent()<br>
12+
&nbsp; #agent.train()<br>
13+
&nbsp; agent.perform()<br>
14+
15+
16+
### Perform Function:<br>
17+
18+
![Capture1](https://user-images.githubusercontent.com/51924622/121318965-5f9e8b00-c929-11eb-86d3-96ea6abb43b6.PNG)
19+
20+
## Train the Agent
21+
For train the agent uncomment the agent.train() in the .py file.<br>
22+
if __name__ == '__main__':<br>
23+
&nbsp; agent = DQNAgent()<br>
24+
&nbsp; agent.train()<br>
25+
&nbsp; #agent.perform()<br>
26+
27+
### Train Function:<br>
28+
![Capture](https://user-images.githubusercontent.com/51924622/121318915-52819c00-c929-11eb-9062-cf8e6ce1c795.PNG)<br>
29+
30+
### After training the model is saved by name "flappybrain.h5" shown in this repository.<br>
31+
## Requirements:
32+
- flappy-bird-gym
33+
- numpy
34+
- tensorflow
35+
### Fork and Run the flappy.py file to see the result.
36+

FlappyBird/flappybrain.h5

1.18 MB
Binary file not shown.

0 commit comments

Comments
 (0)