Skip to content

Commit ac4d5e4

Browse files
Merge pull request #1613 from ayush-09/main
Flappy Bird Game Agent
2 parents 9441705 + acd1a59 commit ac4d5e4

File tree

5 files changed

+302
-0
lines changed

5 files changed

+302
-0
lines changed

AI TicTacToe/src/TicTacToe.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import random
2+
3+
def display_board(board):
4+
print('-------------')
5+
print('| ' + board[7] + ' | ' + board[8] + ' | ' + board[9] + ' |')
6+
print('-------------')
7+
print('| ' + board[4] + ' | ' + board[5] + ' | ' + board[6] + ' |')
8+
print('-------------')
9+
print('| ' + board[1] + ' | ' + board[2] + ' | ' + board[3] + ' |')
10+
print('-------------')
11+
12+
13+
def player_input():
14+
marker = ''
15+
while marker != 'X' and marker != 'O':
16+
marker = input('Player 1, choose X or O: ').upper()
17+
player1 = marker
18+
player2 = 'O' if player1 == 'X' else 'X'
19+
return player1, player2
20+
21+
22+
def place_marker(board, marker, position):
23+
board[position] = marker
24+
25+
26+
def win_check(board, mark):
27+
winning_combinations = [
28+
[1, 2, 3], [4, 5, 6], [7, 8, 9], # rows
29+
[1, 4, 7], [2, 5, 8], [3, 6, 9], # columns
30+
[1, 5, 9], [3, 5, 7] # diagonals
31+
]
32+
return any(all(board[i] == mark for i in combination) for combination in winning_combinations)
33+
34+
35+
def choose_first():
36+
return random.choice(['Player 1', 'Player 2'])
37+
38+
39+
def space_check(board, position):
40+
return board[position] == ' '
41+
42+
43+
def full_board_check(board):
44+
return all(board[i] != ' ' for i in range(1, 10))
45+
46+
47+
def player_choice(board):
48+
position = 0
49+
while position not in range(1, 10) or not space_check(board, position):
50+
position = int(input('Choose a position (1-9): '))
51+
return position
52+
53+
54+
def replay():
55+
choice = input('Do you want to play again? Enter Yes or No: ')
56+
return choice.lower() == 'yes'
57+
58+
59+
def play_tic_tac_toe():
60+
print('Welcome to Tic Tac Toe!')
61+
while True:
62+
the_board = [' '] * 10
63+
player1_marker, player2_marker = player_input()
64+
turn = choose_first()
65+
print(turn + ' will go first.')
66+
play_game = input('Are you ready to play? Enter y or n: ')
67+
if play_game.lower() == 'y':
68+
game_on = True
69+
else:
70+
game_on = False
71+
72+
while game_on:
73+
if turn == 'Player 1':
74+
display_board(the_board)
75+
position = player_choice(the_board)
76+
place_marker(the_board, player1_marker, position)
77+
78+
if win_check(the_board, player1_marker):
79+
display_board(the_board)
80+
print('Player 1 has won!')
81+
game_on = False
82+
else:
83+
if full_board_check(the_board):
84+
display_board(the_board)
85+
print('TIE GAME!')
86+
game_on = False
87+
else:
88+
turn = 'Player 2'
89+
else:
90+
display_board(the_board)
91+
position = player_choice(the_board)
92+
place_marker(the_board, player2_marker, position)
93+
94+
if win_check(the_board, player2_marker):
95+
display_board(the_board)
96+
print('Player 2 has won!')
97+
game_on = False
98+
else:
99+
if full_board_check(the_board):
100+
display_board(the_board)
101+
print('TIE GAME!')
102+
game_on = False
103+
else:
104+
turn = 'Player 1'
105+
106+
if not replay():
107+
if play_game.lower() == 'n':
108+
print('BYE! Have a good day.')
109+
else:
110+
print('Thank you for playing.')
111+
break
112+
113+
# Start the game
114+
play_tic_tac_toe()

FlappyBird/.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"python.pythonPath": "C:\\Users\\Ayush\\.conda\\envs\\tensorflow\\python.exe"
3+
}

FlappyBird/Flappy.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sun Jun 6 10:57:01 2021
4+
5+
@author: Ayush
6+
"""
7+
8+
#Import Dependencies
9+
import random
10+
import numpy as np
11+
import flappy_bird_gym
12+
from collections import deque
13+
from tensorflow.keras.layers import Input, Dense
14+
from tensorflow.keras.models import load_model, save_model, Sequential
15+
from tensorflow.keras.optimizers import RMSprop
16+
17+
# Neural Network for Agent
18+
19+
def NeuralNetwork(input_shape,output_shape):
20+
model = Sequential()
21+
model.add(Input(input_shape))
22+
model.add(Dense(512, input_shape = input_shape, activation ='relu',kernel_initializer='he_uniform'))
23+
model.add(Dense(256, activation ='relu',kernel_initializer='he_uniform'))
24+
model.add(Dense(64, activation ='relu',kernel_initializer='he_uniform'))
25+
model.add(Dense(output_shape, activation ='linear',kernel_initializer='he_uniform'))
26+
model.compile(loss='mse',optimizer=RMSprop(lr=0.0001,rho=0.95, epsilon=0.01),metrics=['accuracy'])
27+
model.summary()
28+
return model
29+
30+
31+
#Brain of Agent || BluePrint of Agent
32+
33+
class DQNAgent:
34+
def __init__(self):
35+
self.env = flappy_bird_gym.make("FlappyBird-v0")
36+
self.episodes = 1000
37+
self.state_space = self.env.observation_space.shape[0]
38+
self.action_space = self.env.action_space.n
39+
self.memory = deque(maxlen=2000)
40+
41+
#Hyperparameters
42+
self.gamma = 0.95
43+
self.epsilon=1
44+
self.epsilon_decay = 0.9999
45+
self.epsilon_min = 0.01
46+
self.batch_number = 64 #16, 32, 128, 256
47+
48+
self.train_start = 1000
49+
self.jump_prob = 0.01
50+
self.model= NeuralNetwork(input_shape=(self.state_space,), output_shape=self.action_space)
51+
52+
def act(self,state):
53+
if np.random.random() > self.epsilon:
54+
return np.argmax(self.model.predict(state))
55+
return 1 if np.random.random() < self.jump_prob else 0
56+
57+
def learn(self):
58+
#Make sure we have enough data
59+
if len(self.memory) < self.train_start:
60+
return
61+
62+
#Create minibatch
63+
minibatch = random.sample(self.memory, min(len(self.memory), self.batch_number))
64+
#Variables to store minibatch info
65+
state = np.zeros((self.batch_number,self.state_space))
66+
next_state = np.zeros((self.batch_number,self.state_space))
67+
68+
action, reward, done = [], [], []
69+
70+
71+
#Store data in variables
72+
for i in range(self.batch_number):
73+
state[i] = minibatch[i][0]
74+
action.append(minibatch[i][1])
75+
reward.append(minibatch[i][2])
76+
next_state[i] = minibatch[i][3]
77+
done.append(minibatch[i][4])
78+
79+
#Predict y label
80+
target = self.model.predict(state)
81+
target_next = self.model.predict(next_state)
82+
83+
for i in range(self.batch_number):
84+
if done[i]:
85+
target[i][action[i]] = reward[i]
86+
else:
87+
target[i][action[i]] = reward[i] + self.gamma *(np.argmax(target_next[i]))
88+
print('training')
89+
self.model.fit(state, target, batch_size=self.batch_number, verbose=0)
90+
91+
def train(self):
92+
#n episode Iterartions for training
93+
for i in range(self.episodes):
94+
# Environment variables for training
95+
state = self.env.reset()
96+
state = np.reshape(state,[1, self.state_space])
97+
done= False
98+
score=0
99+
self.epsilon =self.epsilon * self.epsilon_decay if self.epsilon * self.epsilon_decay > self.epsilon_min else self.epsilon_min
100+
101+
while not done:
102+
self.env.render()
103+
action = self.act(state)
104+
next_state, reward, done, info = self.env.step(action)
105+
106+
#reshape nextstate
107+
next_state = np.reshape(next_state,[1,self.state_space])
108+
score +=1
109+
if done:
110+
reward -= 100
111+
112+
self.memory.append((state, action, reward, next_state, done))
113+
state = next_state
114+
115+
if done:
116+
print("Episode: {}\nScore: {}\nEpsilon: {:.2}".format(i,score,self.epsilon))
117+
#Save model
118+
if score >= 1000:
119+
self.model.save_model('flappybrain.h5')
120+
self.learn()
121+
122+
def perform(self):
123+
self.model = load_model('flappybrain.h5')
124+
while 1:
125+
state = self.env.reset()
126+
state = np.reshape(state,[1,self.state_space])
127+
done = False
128+
score=0
129+
130+
while not done:
131+
self.env.render()
132+
action = np.argmax(self.model.predict(state))
133+
next_state, reward, done, info = self.env.step(action)
134+
state = np.reshape(next_state,[1,self.state_space])
135+
score +=1
136+
137+
print("Current Score: {}".format(score))
138+
139+
if done:
140+
print('DEAD')
141+
break
142+
143+
144+
145+
if __name__ == '__main__':
146+
agent = DQNAgent()
147+
agent.train()
148+
#agent.perform()
149+

FlappyBird/README.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# FlappyBird
2+
This repository contains the implementation of two OpenAI Gym environments for the Flappy Bird game. The implementation of the game's logic and graphics was based on the FlapPyBird project.<br>
3+
4+
### In this flappy.py file I train the Neural Network model with reinforcement learning approach.
5+
## Performance of trained Agent
6+
https://user-images.githubusercontent.com/51924622/121337387-1dca1080-c93a-11eb-9b9f-61bf00bc327f.mp4
7+
8+
<br>
9+
To check the performance and visualize the agent uncomment the agent.perform() and comment the agent.train() in the flappy.py file. <br>
10+
if __name__ == '__main__':<br>
11+
&nbsp; agent = DQNAgent()<br>
12+
&nbsp; #agent.train()<br>
13+
&nbsp; agent.perform()<br>
14+
15+
16+
### Perform Function:<br>
17+
18+
![Capture1](https://user-images.githubusercontent.com/51924622/121318965-5f9e8b00-c929-11eb-86d3-96ea6abb43b6.PNG)
19+
20+
## Train the Agent
21+
For train the agent uncomment the agent.train() in the .py file.<br>
22+
if __name__ == '__main__':<br>
23+
&nbsp; agent = DQNAgent()<br>
24+
&nbsp; agent.train()<br>
25+
&nbsp; #agent.perform()<br>
26+
27+
### Train Function:<br>
28+
![Capture](https://user-images.githubusercontent.com/51924622/121318915-52819c00-c929-11eb-9062-cf8e6ce1c795.PNG)<br>
29+
30+
### After training the model is saved by name "flappybrain.h5" shown in this repository.<br>
31+
## Requirements:
32+
- flappy-bird-gym
33+
- numpy
34+
- tensorflow
35+
### Fork and Run the flappy.py file to see the result.
36+

FlappyBird/flappybrain.h5

1.18 MB
Binary file not shown.

0 commit comments

Comments
 (0)