Merge pull request #1613 from ayush-09/main

avinashkranjan · web-flow · commit ac4d5e4d2c0c · 2023-06-09T13:26:24.000+05:30
Flappy Bird Game Agent
diff --git a/AI TicTacToe/src/TicTacToe.py b/AI TicTacToe/src/TicTacToe.py
@@ -0,0 +1,114 @@
+import random
+
+def display_board(board):
+    print('-------------')
+    print('| ' + board[7] + ' | ' + board[8] + ' | ' + board[9] + ' |')
+    print('-------------')
+    print('| ' + board[4] + ' | ' + board[5] + ' | ' + board[6] + ' |')
+    print('-------------')
+    print('| ' + board[1] + ' | ' + board[2] + ' | ' + board[3] + ' |')
+    print('-------------')
+
+
+def player_input():
+    marker = ''
+    while marker != 'X' and marker != 'O':
+        marker = input('Player 1, choose X or O: ').upper()
+    player1 = marker
+    player2 = 'O' if player1 == 'X' else 'X'
+    return player1, player2
+
+
+def place_marker(board, marker, position):
+    board[position] = marker
+
+
+def win_check(board, mark):
+    winning_combinations = [
+        [1, 2, 3], [4, 5, 6], [7, 8, 9],  # rows
+        [1, 4, 7], [2, 5, 8], [3, 6, 9],  # columns
+        [1, 5, 9], [3, 5, 7]  # diagonals
+    ]
+    return any(all(board[i] == mark for i in combination) for combination in winning_combinations)
+
+
+def choose_first():
+    return random.choice(['Player 1', 'Player 2'])
+
+
+def space_check(board, position):
+    return board[position] == ' '
+
+
+def full_board_check(board):
+    return all(board[i] != ' ' for i in range(1, 10))
+
+
+def player_choice(board):
+    position = 0
+    while position not in range(1, 10) or not space_check(board, position):
+        position = int(input('Choose a position (1-9): '))
+    return position
+
+
+def replay():
+    choice = input('Do you want to play again? Enter Yes or No: ')
+    return choice.lower() == 'yes'
+
+
+def play_tic_tac_toe():
+    print('Welcome to Tic Tac Toe!')
+    while True:
+        the_board = [' '] * 10
+        player1_marker, player2_marker = player_input()
+        turn = choose_first()
+        print(turn + ' will go first.')
+        play_game = input('Are you ready to play? Enter y or n: ')
+        if play_game.lower() == 'y':
+            game_on = True
+        else:
+            game_on = False
+
+        while game_on:
+            if turn == 'Player 1':
+                display_board(the_board)
+                position = player_choice(the_board)
+                place_marker(the_board, player1_marker, position)
+
+                if win_check(the_board, player1_marker):
+                    display_board(the_board)
+                    print('Player 1 has won!')
+                    game_on = False
+                else:
+                    if full_board_check(the_board):
+                        display_board(the_board)
+                        print('TIE GAME!')
+                        game_on = False
+                    else:
+                        turn = 'Player 2'
+            else:
+                display_board(the_board)
+                position = player_choice(the_board)
+                place_marker(the_board, player2_marker, position)
+
+                if win_check(the_board, player2_marker):
+                    display_board(the_board)
+                    print('Player 2 has won!')
+                    game_on = False
+                else:
+                    if full_board_check(the_board):
+                        display_board(the_board)
+                        print('TIE GAME!')
+                        game_on = False
+                    else:
+                        turn = 'Player 1'
+
+        if not replay():
+            if play_game.lower() == 'n':
+                print('BYE! Have a good day.')
+            else:
+                print('Thank you for playing.')
+            break
+
+# Start the game
+play_tic_tac_toe()
diff --git a/FlappyBird/.vscode/settings.json b/FlappyBird/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "C:\\Users\\Ayush\\.conda\\envs\\tensorflow\\python.exe"
+}
diff --git a/FlappyBird/Flappy.py b/FlappyBird/Flappy.py
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun  6 10:57:01 2021
+
+@author: Ayush
+"""
+
+#Import Dependencies
+import random
+import numpy as np
+import flappy_bird_gym
+from collections import deque
+from tensorflow.keras.layers import Input, Dense
+from tensorflow.keras.models import load_model, save_model, Sequential
+from tensorflow.keras.optimizers import RMSprop
+
+# Neural Network for Agent
+
+def NeuralNetwork(input_shape,output_shape):
+    model = Sequential()
+    model.add(Input(input_shape))
+    model.add(Dense(512, input_shape = input_shape, activation ='relu',kernel_initializer='he_uniform'))
+    model.add(Dense(256, activation ='relu',kernel_initializer='he_uniform'))
+    model.add(Dense(64, activation ='relu',kernel_initializer='he_uniform'))
+    model.add(Dense(output_shape, activation ='linear',kernel_initializer='he_uniform'))
+    model.compile(loss='mse',optimizer=RMSprop(lr=0.0001,rho=0.95, epsilon=0.01),metrics=['accuracy'])
+    model.summary()
+    return model
+
+
+#Brain of Agent || BluePrint of Agent
+
+class DQNAgent:
+    def __init__(self):
+        self.env = flappy_bird_gym.make("FlappyBird-v0")
+        self.episodes = 1000
+        self.state_space = self.env.observation_space.shape[0]
+        self.action_space = self.env.action_space.n
+        self.memory = deque(maxlen=2000)
+        
+        #Hyperparameters
+        self.gamma = 0.95
+        self.epsilon=1
+        self.epsilon_decay = 0.9999
+        self.epsilon_min = 0.01
+        self.batch_number = 64 #16, 32, 128, 256
+        
+        self.train_start = 1000
+        self.jump_prob = 0.01
+        self.model= NeuralNetwork(input_shape=(self.state_space,), output_shape=self.action_space)
+        
+    def act(self,state):
+        if np.random.random() > self.epsilon:
+            return np.argmax(self.model.predict(state))
+        return 1 if np.random.random() < self.jump_prob else 0
+    
+    def learn(self):
+        #Make sure we have enough data
+        if len(self.memory) < self.train_start:
+            return
+        
+        #Create minibatch
+        minibatch = random.sample(self.memory, min(len(self.memory), self.batch_number))
+        #Variables to store minibatch info
+        state = np.zeros((self.batch_number,self.state_space))
+        next_state = np.zeros((self.batch_number,self.state_space))
+        
+        action, reward, done = [], [], []
+        
+        
+        #Store data in variables
+        for i in range(self.batch_number):
+            state[i] = minibatch[i][0]
+            action.append(minibatch[i][1])
+            reward.append(minibatch[i][2])
+            next_state[i] = minibatch[i][3]
+            done.append(minibatch[i][4])
+        
+        #Predict y label
+        target = self.model.predict(state)
+        target_next = self.model.predict(next_state)
+        
+        for i in range(self.batch_number):
+            if done[i]:
+                target[i][action[i]] = reward[i]
+            else:
+                target[i][action[i]] = reward[i] + self.gamma *(np.argmax(target_next[i]))
+        print('training')
+        self.model.fit(state, target, batch_size=self.batch_number, verbose=0)
+    
+    def train(self):
+        #n episode Iterartions for training
+        for i in range(self.episodes):
+            # Environment variables for training
+            state = self.env.reset()
+            state = np.reshape(state,[1, self.state_space])
+            done= False
+            score=0
+            self.epsilon =self.epsilon * self.epsilon_decay if self.epsilon * self.epsilon_decay > self.epsilon_min else self.epsilon_min
+            
+            while not done:
+                self.env.render()
+                action = self.act(state)
+                next_state, reward, done, info = self.env.step(action)
+                
+                #reshape nextstate
+                next_state = np.reshape(next_state,[1,self.state_space])
+                score +=1
+                if done:
+                    reward -= 100
+                
+                self.memory.append((state, action, reward, next_state, done))
+                state = next_state
+                
+                if done:
+                    print("Episode: {}\nScore: {}\nEpsilon: {:.2}".format(i,score,self.epsilon))
+                    #Save model
+                    if score >= 1000:
+                        self.model.save_model('flappybrain.h5')
+                self.learn()
+    
+    def perform(self):
+        self.model = load_model('flappybrain.h5')
+        while 1:
+            state = self.env.reset()
+            state = np.reshape(state,[1,self.state_space])
+            done = False
+            score=0
+            
+            while not done:
+                self.env.render()
+                action = np.argmax(self.model.predict(state))
+                next_state, reward, done, info = self.env.step(action)
+                state = np.reshape(next_state,[1,self.state_space])
+                score +=1
+                
+                print("Current Score: {}".format(score))
+                
+                if done:
+                    print('DEAD')
+                    break
+                
+    
+  
+if __name__ == '__main__':
+    agent = DQNAgent()
+    agent.train()
+    #agent.perform()
+        
diff --git a/FlappyBird/README.md b/FlappyBird/README.md
@@ -0,0 +1,36 @@
+# FlappyBird
+This repository contains the implementation of two OpenAI Gym environments for the Flappy Bird game. The implementation of the game's logic and graphics was based on the FlapPyBird project.<br>
+
+### In this flappy.py file I train the Neural Network model with reinforcement learning approach.
+## Performance of trained Agent
+https://user-images.githubusercontent.com/51924622/121337387-1dca1080-c93a-11eb-9b9f-61bf00bc327f.mp4 
+
+<br>
+To check the performance and visualize the agent uncomment the agent.perform() and comment the agent.train() in the flappy.py file. <br>
+if __name__ == '__main__':<br>
+    &nbsp; agent = DQNAgent()<br>
+    &nbsp; #agent.train()<br>
+    &nbsp; agent.perform()<br>
+    
+    
+### Perform Function:<br>
+
+![Capture1](https://user-images.githubusercontent.com/51924622/121318965-5f9e8b00-c929-11eb-86d3-96ea6abb43b6.PNG)
+
+## Train the Agent
+ For train the agent uncomment the agent.train() in the .py file.<br>
+ if __name__ == '__main__':<br>
+    &nbsp; agent = DQNAgent()<br>
+    &nbsp; agent.train()<br>
+    &nbsp; #agent.perform()<br>
+ 
+ ### Train Function:<br>
+![Capture](https://user-images.githubusercontent.com/51924622/121318915-52819c00-c929-11eb-9062-cf8e6ce1c795.PNG)<br>
+
+### After training the model is saved by name "flappybrain.h5" shown in this repository.<br>
+## Requirements:
+- flappy-bird-gym
+- numpy
+- tensorflow 
+### Fork and Run the flappy.py file to see the result.
+
diff --git a/FlappyBird/flappybrain.h5 b/FlappyBird/flappybrain.h5

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{`
	`2`	`+ "python.pythonPath": "C:\\Users\\Ayush\\.conda\\envs\\tensorflow\\python.exe"`
	`3`	`+}`