diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..42948d5 Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index 4a61df8..736feea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ # ignore all pyc files. *.pyc +.DS_Store +.DS_Store +.DS_Store +.DS_Store diff --git a/README.md b/README.md index e7336ab..820af6b 100644 --- a/README.md +++ b/README.md @@ -2,20 +2,19 @@ -7 mins version: [DQN for flappy bird](https://www.youtube.com/watch?v=THhUXIhjkCM) ## Overview This project follows the description of the Deep Q Learning algorithm described in Playing Atari with Deep Reinforcement Learning [2] and shows that this learning algorithm can be further generalized to the notorious Flappy Bird. ## Installation Dependencies: * Python 2.7 or 3 -* TensorFlow 0.7 +* TensorFlow 2 * pygame * OpenCV-Python ## How to Run? ``` -git clone https://github.com/yenchenlin1994/DeepLearningFlappyBird.git +git clone https://github.com/Anonymous-Ol/DeepLearningFlappyBird-1.git cd DeepLearningFlappyBird python deep_q_network.py ``` @@ -23,9 +22,6 @@ python deep_q_network.py ## What is Deep Q-Network? It is a convolutional neural network, trained with a variant of Q-learning, whose input is raw pixels and whose output is a value function estimating future rewards. -For those who are interested in deep reinforcement learning, I highly recommend to read the following post: - -[Demystifying Deep Reinforcement Learning](http://www.nervanasys.com/demystifying-deep-reinforcement-learning/) ## Deep Q-Network Algorithm @@ -88,16 +84,10 @@ Change [first line of `saved_networks/checkpoint`](https://github.com/yenchenlin `model_checkpoint_path: "saved_networks/bird-dqn-2920000"` -#### How to reproduce? -1. Comment out [these lines](https://github.com/yenchenlin1994/DeepLearningFlappyBird/blob/master/deep_q_network.py#L108-L112) +#### How to test? +1. Uncomment these lines https://github.com/yenchenlin1994/DeepLearningFlappyBird/blob/master/deep_q_network.py#L108-L112 +2. You may want to decrease explore and elipson. -2. Modify `deep_q_network.py`'s parameter as follow: -```python -OBSERVE = 10000 -EXPLORE = 3000000 -FINAL_EPSILON = 0.0001 -INITIAL_EPSILON = 0.1 -``` ## References diff --git a/deep_q_network.py b/deep_q_network.py old mode 100755 new mode 100644 index 1294f96..dab4f7d --- a/deep_q_network.py +++ b/deep_q_network.py @@ -14,15 +14,16 @@ ACTIONS = 2 # number of valid actions GAMMA = 0.99 # decay rate of past observations OBSERVE = 100000. # timesteps to observe before training -EXPLORE = 2000000. # frames over which to anneal epsilon +EXPLORE = 3000000. # frames over which to anneal epsilon FINAL_EPSILON = 0.0001 # final value of epsilon -INITIAL_EPSILON = 0.0001 # starting value of epsilon +INITIAL_EPSILON = 0.1 # starting value of epsilon REPLAY_MEMORY = 50000 # number of previous transitions to remember BATCH = 32 # size of minibatch FRAME_PER_ACTION = 1 + def weight_variable(shape): - initial = tf.truncated_normal(shape, stddev = 0.01) + initial = tf.random.truncated_normal(shape, stddev = 0.01) return tf.Variable(initial) def bias_variable(shape): @@ -30,10 +31,10 @@ def bias_variable(shape): return tf.Variable(initial) def conv2d(x, W, stride): - return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME") + return tf.nn.conv2d(input=x, filters=W, strides = [1, stride, stride, 1], padding = "SAME") def max_pool_2x2(x): - return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME") + return tf.nn.max_pool2d(input=x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME") def createNetwork(): # network weights @@ -53,7 +54,7 @@ def createNetwork(): b_fc2 = bias_variable([ACTIONS]) # input layer - s = tf.placeholder("float", [None, 80, 80, 4]) + s = tf.compat.v1.placeholder("float", [None, 80, 80, 4]) # hidden layers h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1) @@ -77,11 +78,11 @@ def createNetwork(): def trainNetwork(s, readout, h_fc1, sess): # define the cost function - a = tf.placeholder("float", [None, ACTIONS]) - y = tf.placeholder("float", [None]) - readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1) - cost = tf.reduce_mean(tf.square(y - readout_action)) - train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) + a = tf.compat.v1.placeholder("float", [None, ACTIONS]) + y = tf.compat.v1.placeholder("float", [None]) + readout_action = tf.reduce_sum(input_tensor=tf.multiply(readout, a), axis=1) + cost = tf.reduce_mean(input_tensor=tf.square(y - readout_action)) + train_step = tf.compat.v1.train.AdamOptimizer(1e-6).minimize(cost) # open up a game state to communicate with emulator game_state = game.GameState() @@ -102,14 +103,14 @@ def trainNetwork(s, readout, h_fc1, sess): s_t = np.stack((x_t, x_t, x_t, x_t), axis=2) # saving and loading networks - saver = tf.train.Saver() - sess.run(tf.initialize_all_variables()) + saver = tf.compat.v1.train.Saver() + sess.run(tf.compat.v1.initialize_all_variables()) checkpoint = tf.train.get_checkpoint_state("saved_networks") - if checkpoint and checkpoint.model_checkpoint_path: - saver.restore(sess, checkpoint.model_checkpoint_path) - print("Successfully loaded:", checkpoint.model_checkpoint_path) - else: - print("Could not find old network weights") + #if checkpoint and checkpoint.model_checkpoint_path: + #saver.restore(sess, checkpoint.model_checkpoint_path) + #print("Successfully loaded:", checkpoint.model_checkpoint_path) + #else: + #print("Could not find old network weights") # start training epsilon = INITIAL_EPSILON @@ -204,11 +205,12 @@ def trainNetwork(s, readout, h_fc1, sess): ''' def playGame(): - sess = tf.InteractiveSession() + sess = tf.compat.v1.InteractiveSession() s, readout, h_fc1 = createNetwork() trainNetwork(s, readout, h_fc1, sess) def main(): + tf.compat.v1.disable_eager_execution() playGame() if __name__ == "__main__": diff --git a/game/wrapped_flappy_bird.py b/game/wrapped_flappy_bird.py index 24a102e..f47140f 100644 --- a/game/wrapped_flappy_bird.py +++ b/game/wrapped_flappy_bird.py @@ -7,7 +7,7 @@ from pygame.locals import * from itertools import cycle -FPS = 30 +FPS = 4000 SCREENWIDTH = 288 SCREENHEIGHT = 512 @@ -139,7 +139,7 @@ def frame_step(self, input_actions): (self.playerx, self.playery)) image_data = pygame.surfarray.array3d(pygame.display.get_surface()) - pygame.display.update() + #pygame.display.update() FPSCLOCK.tick(FPS) #print self.upperPipes[0]['y'] + PIPE_HEIGHT - int(BASEY * 0.2) return image_data, reward, terminal diff --git a/report.txt b/report.txt new file mode 100644 index 0000000..820d9a9 --- /dev/null +++ b/report.txt @@ -0,0 +1,37 @@ +TensorFlow 2.0 Upgrade Script +----------------------------- +Converted 1 files +Detected 1 issues that require attention +-------------------------------------------------------------------------------- +-------------------------------------------------------------------------------- +File: deep_q_network.py +-------------------------------------------------------------------------------- +deep_q_network.py:184:12: WARNING: *.save requires manual check. (This warning is only applicable if the code saves a tf.Keras model) Keras model.save now saves to the Tensorflow SavedModel format by default, instead of HDF5. To continue saving to HDF5, add the argument save_format='h5' to the save() function. +================================================================================ +Detailed log follows: + +================================================================================ +-------------------------------------------------------------------------------- +Processing file 'deep_q_network.py' + outputting to 'deep_q_network_updated.py' +-------------------------------------------------------------------------------- + +25:14: INFO: Renamed 'tf.truncated_normal' to 'tf.random.truncated_normal' +33:11: INFO: Added keywords to args of function 'tf.nn.conv2d' +33:11: INFO: Renamed keyword argument for tf.nn.conv2d from filter to filters +36:11: INFO: Added keywords to args of function 'tf.nn.max_pool' +36:11: INFO: Renamed keyword argument for tf.nn.max_pool from value to input +36:11: INFO: Renamed 'tf.nn.max_pool' to 'tf.nn.max_pool2d' +56:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder' +80:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder' +81:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder' +82:21: INFO: Added keywords to args of function 'tf.reduce_sum' +82:21: INFO: Renamed keyword argument for tf.reduce_sum from reduction_indices to axis +83:11: INFO: Added keywords to args of function 'tf.reduce_mean' +84:17: INFO: Renamed 'tf.train.AdamOptimizer' to 'tf.compat.v1.train.AdamOptimizer' +105:12: INFO: Renamed 'tf.train.Saver' to 'tf.compat.v1.train.Saver' +106:13: INFO: Renamed 'tf.initialize_all_variables' to 'tf.compat.v1.initialize_all_variables' +184:12: WARNING: *.save requires manual check. (This warning is only applicable if the code saves a tf.Keras model) Keras model.save now saves to the Tensorflow SavedModel format by default, instead of HDF5. To continue saving to HDF5, add the argument save_format='h5' to the save() function. +207:11: INFO: Renamed 'tf.InteractiveSession' to 'tf.compat.v1.InteractiveSession' +-------------------------------------------------------------------------------- + diff --git a/saved_networks/bird-dqn-2880000 b/saved_networks/bird-dqn-2880000 deleted file mode 100644 index 4b1fe79..0000000 Binary files a/saved_networks/bird-dqn-2880000 and /dev/null differ diff --git a/saved_networks/bird-dqn-2880000.meta b/saved_networks/bird-dqn-2880000.meta deleted file mode 100644 index 281f036..0000000 Binary files a/saved_networks/bird-dqn-2880000.meta and /dev/null differ diff --git a/saved_networks/bird-dqn-2890000 b/saved_networks/bird-dqn-2890000 deleted file mode 100644 index a911903..0000000 Binary files a/saved_networks/bird-dqn-2890000 and /dev/null differ diff --git a/saved_networks/bird-dqn-2890000.meta b/saved_networks/bird-dqn-2890000.meta deleted file mode 100644 index 281f036..0000000 Binary files a/saved_networks/bird-dqn-2890000.meta and /dev/null differ diff --git a/saved_networks/bird-dqn-2900000 b/saved_networks/bird-dqn-2900000 deleted file mode 100644 index b1f9415..0000000 Binary files a/saved_networks/bird-dqn-2900000 and /dev/null differ diff --git a/saved_networks/bird-dqn-2900000.meta b/saved_networks/bird-dqn-2900000.meta deleted file mode 100644 index 281f036..0000000 Binary files a/saved_networks/bird-dqn-2900000.meta and /dev/null differ diff --git a/saved_networks/bird-dqn-2910000 b/saved_networks/bird-dqn-2910000 deleted file mode 100644 index a68db5c..0000000 Binary files a/saved_networks/bird-dqn-2910000 and /dev/null differ diff --git a/saved_networks/bird-dqn-2910000.meta b/saved_networks/bird-dqn-2910000.meta deleted file mode 100644 index 281f036..0000000 Binary files a/saved_networks/bird-dqn-2910000.meta and /dev/null differ diff --git a/saved_networks/bird-dqn-2920000 b/saved_networks/bird-dqn-2920000 deleted file mode 100644 index 1773682..0000000 Binary files a/saved_networks/bird-dqn-2920000 and /dev/null differ diff --git a/saved_networks/bird-dqn-2920000.meta b/saved_networks/bird-dqn-2920000.meta deleted file mode 100644 index 281f036..0000000 Binary files a/saved_networks/bird-dqn-2920000.meta and /dev/null differ diff --git a/saved_networks/checkpoint b/saved_networks/checkpoint deleted file mode 100644 index 9f6d6fb..0000000 --- a/saved_networks/checkpoint +++ /dev/null @@ -1,6 +0,0 @@ -model_checkpoint_path: "bird-dqn-2920000" -all_model_checkpoint_paths: "bird-dqn-2880000" -all_model_checkpoint_paths: "bird-dqn-2890000" -all_model_checkpoint_paths: "bird-dqn-2900000" -all_model_checkpoint_paths: "bird-dqn-2910000" -all_model_checkpoint_paths: "bird-dqn-2920000" diff --git a/saved_networks/pretrained_model/bird-dqn-policy b/saved_networks/pretrained_model/bird-dqn-policy deleted file mode 100644 index 1773682..0000000 Binary files a/saved_networks/pretrained_model/bird-dqn-policy and /dev/null differ