Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# ignore all pyc files.
*.pyc

.DS_Store
.DS_Store
.DS_Store
.DS_Store
20 changes: 5 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,26 @@

<img src="./images/flappy_bird_demp.gif" width="250">

7 mins version: [DQN for flappy bird](https://www.youtube.com/watch?v=THhUXIhjkCM)

## Overview
This project follows the description of the Deep Q Learning algorithm described in Playing Atari with Deep Reinforcement Learning [2] and shows that this learning algorithm can be further generalized to the notorious Flappy Bird.

## Installation Dependencies:
* Python 2.7 or 3
* TensorFlow 0.7
* TensorFlow 2
* pygame
* OpenCV-Python

## How to Run?
```
git clone https://github.com/yenchenlin1994/DeepLearningFlappyBird.git
git clone https://github.com/Anonymous-Ol/DeepLearningFlappyBird-1.git
cd DeepLearningFlappyBird
python deep_q_network.py
```

## What is Deep Q-Network?
It is a convolutional neural network, trained with a variant of Q-learning, whose input is raw pixels and whose output is a value function estimating future rewards.

For those who are interested in deep reinforcement learning, I highly recommend to read the following post:

[Demystifying Deep Reinforcement Learning](http://www.nervanasys.com/demystifying-deep-reinforcement-learning/)

## Deep Q-Network Algorithm

Expand Down Expand Up @@ -88,16 +84,10 @@ Change [first line of `saved_networks/checkpoint`](https://github.com/yenchenlin

`model_checkpoint_path: "saved_networks/bird-dqn-2920000"`

#### How to reproduce?
1. Comment out [these lines](https://github.com/yenchenlin1994/DeepLearningFlappyBird/blob/master/deep_q_network.py#L108-L112)
#### How to test?
1. Uncomment these lines https://github.com/yenchenlin1994/DeepLearningFlappyBird/blob/master/deep_q_network.py#L108-L112
2. You may want to decrease explore and elipson.

2. Modify `deep_q_network.py`'s parameter as follow:
```python
OBSERVE = 10000
EXPLORE = 3000000
FINAL_EPSILON = 0.0001
INITIAL_EPSILON = 0.1
```

## References

Expand Down
40 changes: 21 additions & 19 deletions deep_q_network.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,27 @@
ACTIONS = 2 # number of valid actions
GAMMA = 0.99 # decay rate of past observations
OBSERVE = 100000. # timesteps to observe before training
EXPLORE = 2000000. # frames over which to anneal epsilon
EXPLORE = 3000000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.0001 # final value of epsilon
INITIAL_EPSILON = 0.0001 # starting value of epsilon
INITIAL_EPSILON = 0.1 # starting value of epsilon
REPLAY_MEMORY = 50000 # number of previous transitions to remember
BATCH = 32 # size of minibatch
FRAME_PER_ACTION = 1


def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.01)
initial = tf.random.truncated_normal(shape, stddev = 0.01)
return tf.Variable(initial)

def bias_variable(shape):
initial = tf.constant(0.01, shape = shape)
return tf.Variable(initial)

def conv2d(x, W, stride):
return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME")
return tf.nn.conv2d(input=x, filters=W, strides = [1, stride, stride, 1], padding = "SAME")

def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")
return tf.nn.max_pool2d(input=x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

def createNetwork():
# network weights
Expand All @@ -53,7 +54,7 @@ def createNetwork():
b_fc2 = bias_variable([ACTIONS])

# input layer
s = tf.placeholder("float", [None, 80, 80, 4])
s = tf.compat.v1.placeholder("float", [None, 80, 80, 4])

# hidden layers
h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)
Expand All @@ -77,11 +78,11 @@ def createNetwork():

def trainNetwork(s, readout, h_fc1, sess):
# define the cost function
a = tf.placeholder("float", [None, ACTIONS])
y = tf.placeholder("float", [None])
readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)
cost = tf.reduce_mean(tf.square(y - readout_action))
train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
a = tf.compat.v1.placeholder("float", [None, ACTIONS])
y = tf.compat.v1.placeholder("float", [None])
readout_action = tf.reduce_sum(input_tensor=tf.multiply(readout, a), axis=1)
cost = tf.reduce_mean(input_tensor=tf.square(y - readout_action))
train_step = tf.compat.v1.train.AdamOptimizer(1e-6).minimize(cost)

# open up a game state to communicate with emulator
game_state = game.GameState()
Expand All @@ -102,14 +103,14 @@ def trainNetwork(s, readout, h_fc1, sess):
s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

# saving and loading networks
saver = tf.train.Saver()
sess.run(tf.initialize_all_variables())
saver = tf.compat.v1.train.Saver()
sess.run(tf.compat.v1.initialize_all_variables())
checkpoint = tf.train.get_checkpoint_state("saved_networks")
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(sess, checkpoint.model_checkpoint_path)
print("Successfully loaded:", checkpoint.model_checkpoint_path)
else:
print("Could not find old network weights")
#if checkpoint and checkpoint.model_checkpoint_path:
#saver.restore(sess, checkpoint.model_checkpoint_path)
#print("Successfully loaded:", checkpoint.model_checkpoint_path)
#else:
#print("Could not find old network weights")

# start training
epsilon = INITIAL_EPSILON
Expand Down Expand Up @@ -204,11 +205,12 @@ def trainNetwork(s, readout, h_fc1, sess):
'''

def playGame():
sess = tf.InteractiveSession()
sess = tf.compat.v1.InteractiveSession()
s, readout, h_fc1 = createNetwork()
trainNetwork(s, readout, h_fc1, sess)

def main():
tf.compat.v1.disable_eager_execution()
playGame()

if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions game/wrapped_flappy_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pygame.locals import *
from itertools import cycle

FPS = 30
FPS = 4000
SCREENWIDTH = 288
SCREENHEIGHT = 512

Expand Down Expand Up @@ -139,7 +139,7 @@ def frame_step(self, input_actions):
(self.playerx, self.playery))

image_data = pygame.surfarray.array3d(pygame.display.get_surface())
pygame.display.update()
#pygame.display.update()
FPSCLOCK.tick(FPS)
#print self.upperPipes[0]['y'] + PIPE_HEIGHT - int(BASEY * 0.2)
return image_data, reward, terminal
Expand Down
37 changes: 37 additions & 0 deletions report.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
TensorFlow 2.0 Upgrade Script
-----------------------------
Converted 1 files
Detected 1 issues that require attention
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
File: deep_q_network.py
--------------------------------------------------------------------------------
deep_q_network.py:184:12: WARNING: *.save requires manual check. (This warning is only applicable if the code saves a tf.Keras model) Keras model.save now saves to the Tensorflow SavedModel format by default, instead of HDF5. To continue saving to HDF5, add the argument save_format='h5' to the save() function.
================================================================================
Detailed log follows:

================================================================================
--------------------------------------------------------------------------------
Processing file 'deep_q_network.py'
outputting to 'deep_q_network_updated.py'
--------------------------------------------------------------------------------

25:14: INFO: Renamed 'tf.truncated_normal' to 'tf.random.truncated_normal'
33:11: INFO: Added keywords to args of function 'tf.nn.conv2d'
33:11: INFO: Renamed keyword argument for tf.nn.conv2d from filter to filters
36:11: INFO: Added keywords to args of function 'tf.nn.max_pool'
36:11: INFO: Renamed keyword argument for tf.nn.max_pool from value to input
36:11: INFO: Renamed 'tf.nn.max_pool' to 'tf.nn.max_pool2d'
56:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder'
80:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder'
81:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder'
82:21: INFO: Added keywords to args of function 'tf.reduce_sum'
82:21: INFO: Renamed keyword argument for tf.reduce_sum from reduction_indices to axis
83:11: INFO: Added keywords to args of function 'tf.reduce_mean'
84:17: INFO: Renamed 'tf.train.AdamOptimizer' to 'tf.compat.v1.train.AdamOptimizer'
105:12: INFO: Renamed 'tf.train.Saver' to 'tf.compat.v1.train.Saver'
106:13: INFO: Renamed 'tf.initialize_all_variables' to 'tf.compat.v1.initialize_all_variables'
184:12: WARNING: *.save requires manual check. (This warning is only applicable if the code saves a tf.Keras model) Keras model.save now saves to the Tensorflow SavedModel format by default, instead of HDF5. To continue saving to HDF5, add the argument save_format='h5' to the save() function.
207:11: INFO: Renamed 'tf.InteractiveSession' to 'tf.compat.v1.InteractiveSession'
--------------------------------------------------------------------------------

Binary file removed saved_networks/bird-dqn-2880000
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2880000.meta
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2890000
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2890000.meta
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2900000
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2900000.meta
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2910000
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2910000.meta
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2920000
Binary file not shown.
Binary file removed saved_networks/bird-dqn-2920000.meta
Binary file not shown.
6 changes: 0 additions & 6 deletions saved_networks/checkpoint

This file was deleted.

Binary file removed saved_networks/pretrained_model/bird-dqn-policy
Binary file not shown.