yenchenlin · ozcode2009 · May 27, 2021 · May 27, 2021 · May 27, 2021 · May 27, 2021
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,7 @@
 # ignore all pyc files.
 *.pyc
 
+.DS_Store
+.DS_Store
+.DS_Store
+.DS_Store
diff --git a/README.md b/README.md
@@ -2,30 +2,26 @@
 
 <img src="./images/flappy_bird_demp.gif" width="250">
 
-7 mins version: [DQN for flappy bird](https://www.youtube.com/watch?v=THhUXIhjkCM)
 
 ## Overview
 This project follows the description of the Deep Q Learning algorithm described in Playing Atari with Deep Reinforcement Learning [2] and shows that this learning algorithm can be further generalized to the notorious Flappy Bird.
 
 ## Installation Dependencies:
 * Python 2.7 or 3
-* TensorFlow 0.7
+* TensorFlow 2
 * pygame
 * OpenCV-Python
 
 ## How to Run?
 ```
-git clone https://github.com/yenchenlin1994/DeepLearningFlappyBird.git
+git clone https://github.com/Anonymous-Ol/DeepLearningFlappyBird-1.git
 cd DeepLearningFlappyBird
 python deep_q_network.py
 ```
 
 ## What is Deep Q-Network?
 It is a convolutional neural network, trained with a variant of Q-learning, whose input is raw pixels and whose output is a value function estimating future rewards.
 
-For those who are interested in deep reinforcement learning, I highly recommend to read the following post:
-
-[Demystifying Deep Reinforcement Learning](http://www.nervanasys.com/demystifying-deep-reinforcement-learning/)
 
 ## Deep Q-Network Algorithm
 
@@ -88,16 +84,10 @@ Change [first line of `saved_networks/checkpoint`](https://github.com/yenchenlin
 
 `model_checkpoint_path: "saved_networks/bird-dqn-2920000"`
 
-#### How to reproduce?
-1. Comment out [these lines](https://github.com/yenchenlin1994/DeepLearningFlappyBird/blob/master/deep_q_network.py#L108-L112)
+#### How to test?
+1. Uncomment these lines https://github.com/yenchenlin1994/DeepLearningFlappyBird/blob/master/deep_q_network.py#L108-L112
+2. You may want to decrease explore and elipson.
 
-2. Modify `deep_q_network.py`'s parameter as follow:
-```python
-OBSERVE = 10000
-EXPLORE = 3000000
-FINAL_EPSILON = 0.0001
-INITIAL_EPSILON = 0.1
-```
 
 ## References
 

diff --git a/deep_q_network.py b/deep_q_network.py
@@ -14,26 +14,27 @@
 ACTIONS = 2 # number of valid actions
 GAMMA = 0.99 # decay rate of past observations
 OBSERVE = 100000. # timesteps to observe before training
-EXPLORE = 2000000. # frames over which to anneal epsilon
+EXPLORE = 3000000. # frames over which to anneal epsilon
 FINAL_EPSILON = 0.0001 # final value of epsilon
-INITIAL_EPSILON = 0.0001 # starting value of epsilon
+INITIAL_EPSILON = 0.1 # starting value of epsilon
 REPLAY_MEMORY = 50000 # number of previous transitions to remember
 BATCH = 32 # size of minibatch
 FRAME_PER_ACTION = 1
 
+
 def weight_variable(shape):
-    initial = tf.truncated_normal(shape, stddev = 0.01)
+    initial = tf.random.truncated_normal(shape, stddev = 0.01)
     return tf.Variable(initial)
 
 def bias_variable(shape):
     initial = tf.constant(0.01, shape = shape)
     return tf.Variable(initial)
 
 def conv2d(x, W, stride):
-    return tf.nn.conv2d(x, W, strides = [1, stride, stride, 1], padding = "SAME")
+    return tf.nn.conv2d(input=x, filters=W, strides = [1, stride, stride, 1], padding = "SAME")
 
 def max_pool_2x2(x):
-    return tf.nn.max_pool(x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")
+    return tf.nn.max_pool2d(input=x, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")
 
 def createNetwork():
     # network weights
@@ -53,7 +54,7 @@ def createNetwork():
     b_fc2 = bias_variable([ACTIONS])
 
     # input layer
-    s = tf.placeholder("float", [None, 80, 80, 4])
+    s = tf.compat.v1.placeholder("float", [None, 80, 80, 4])
 
     # hidden layers
     h_conv1 = tf.nn.relu(conv2d(s, W_conv1, 4) + b_conv1)
@@ -77,11 +78,11 @@ def createNetwork():
 
 def trainNetwork(s, readout, h_fc1, sess):
     # define the cost function
-    a = tf.placeholder("float", [None, ACTIONS])
-    y = tf.placeholder("float", [None])
-    readout_action = tf.reduce_sum(tf.multiply(readout, a), reduction_indices=1)
-    cost = tf.reduce_mean(tf.square(y - readout_action))
-    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)
+    a = tf.compat.v1.placeholder("float", [None, ACTIONS])
+    y = tf.compat.v1.placeholder("float", [None])
+    readout_action = tf.reduce_sum(input_tensor=tf.multiply(readout, a), axis=1)
+    cost = tf.reduce_mean(input_tensor=tf.square(y - readout_action))
+    train_step = tf.compat.v1.train.AdamOptimizer(1e-6).minimize(cost)
 
     # open up a game state to communicate with emulator
     game_state = game.GameState()
@@ -102,14 +103,14 @@ def trainNetwork(s, readout, h_fc1, sess):
     s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
 
     # saving and loading networks
-    saver = tf.train.Saver()
-    sess.run(tf.initialize_all_variables())
+    saver = tf.compat.v1.train.Saver()
+    sess.run(tf.compat.v1.initialize_all_variables())
     checkpoint = tf.train.get_checkpoint_state("saved_networks")
-    if checkpoint and checkpoint.model_checkpoint_path:
-        saver.restore(sess, checkpoint.model_checkpoint_path)
-        print("Successfully loaded:", checkpoint.model_checkpoint_path)
-    else:
-        print("Could not find old network weights")
+    #if checkpoint and checkpoint.model_checkpoint_path:
+        #saver.restore(sess, checkpoint.model_checkpoint_path)
+        #print("Successfully loaded:", checkpoint.model_checkpoint_path)
+    #else:
+        #print("Could not find old network weights")
 
     # start training
     epsilon = INITIAL_EPSILON
@@ -204,11 +205,12 @@ def trainNetwork(s, readout, h_fc1, sess):
         '''
 
 def playGame():
-    sess = tf.InteractiveSession()
+    sess = tf.compat.v1.InteractiveSession()
     s, readout, h_fc1 = createNetwork()
     trainNetwork(s, readout, h_fc1, sess)
 
 def main():
+    tf.compat.v1.disable_eager_execution()
     playGame()
 
 if __name__ == "__main__":

diff --git a/game/wrapped_flappy_bird.py b/game/wrapped_flappy_bird.py
@@ -7,7 +7,7 @@
 from pygame.locals import *
 from itertools import cycle
 
-FPS = 30
+FPS = 4000
 SCREENWIDTH  = 288
 SCREENHEIGHT = 512
 
@@ -139,7 +139,7 @@ def frame_step(self, input_actions):
                     (self.playerx, self.playery))
 
         image_data = pygame.surfarray.array3d(pygame.display.get_surface())
-        pygame.display.update()
+        #pygame.display.update()
         FPSCLOCK.tick(FPS)
         #print self.upperPipes[0]['y'] + PIPE_HEIGHT - int(BASEY * 0.2)
         return image_data, reward, terminal

diff --git a/report.txt b/report.txt
@@ -0,0 +1,37 @@
+TensorFlow 2.0 Upgrade Script
+-----------------------------
+Converted 1 files
+Detected 1 issues that require attention
+--------------------------------------------------------------------------------
+--------------------------------------------------------------------------------
+File: deep_q_network.py
+--------------------------------------------------------------------------------
+deep_q_network.py:184:12: WARNING: *.save requires manual check. (This warning is only applicable if the code saves a tf.Keras model) Keras model.save now saves to the Tensorflow SavedModel format by default, instead of HDF5. To continue saving to HDF5, add the argument save_format='h5' to the save() function.
+================================================================================
+Detailed log follows:
+
+================================================================================
+--------------------------------------------------------------------------------
+Processing file 'deep_q_network.py'
+ outputting to 'deep_q_network_updated.py'
+--------------------------------------------------------------------------------
+
+25:14: INFO: Renamed 'tf.truncated_normal' to 'tf.random.truncated_normal'
+33:11: INFO: Added keywords to args of function 'tf.nn.conv2d'
+33:11: INFO: Renamed keyword argument for tf.nn.conv2d from filter to filters
+36:11: INFO: Added keywords to args of function 'tf.nn.max_pool'
+36:11: INFO: Renamed keyword argument for tf.nn.max_pool from value to input
+36:11: INFO: Renamed 'tf.nn.max_pool' to 'tf.nn.max_pool2d'
+56:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder'
+80:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder'
+81:8: INFO: Renamed 'tf.placeholder' to 'tf.compat.v1.placeholder'
+82:21: INFO: Added keywords to args of function 'tf.reduce_sum'
+82:21: INFO: Renamed keyword argument for tf.reduce_sum from reduction_indices to axis
+83:11: INFO: Added keywords to args of function 'tf.reduce_mean'
+84:17: INFO: Renamed 'tf.train.AdamOptimizer' to 'tf.compat.v1.train.AdamOptimizer'
+105:12: INFO: Renamed 'tf.train.Saver' to 'tf.compat.v1.train.Saver'
+106:13: INFO: Renamed 'tf.initialize_all_variables' to 'tf.compat.v1.initialize_all_variables'
+184:12: WARNING: *.save requires manual check. (This warning is only applicable if the code saves a tf.Keras model) Keras model.save now saves to the Tensorflow SavedModel format by default, instead of HDF5. To continue saving to HDF5, add the argument save_format='h5' to the save() function.
+207:11: INFO: Renamed 'tf.InteractiveSession' to 'tf.compat.v1.InteractiveSession'
+--------------------------------------------------------------------------------
+
diff --git a/saved_networks/bird-dqn-2880000 b/saved_networks/bird-dqn-2880000
diff --git a/saved_networks/bird-dqn-2880000.meta b/saved_networks/bird-dqn-2880000.meta
diff --git a/saved_networks/bird-dqn-2890000 b/saved_networks/bird-dqn-2890000
diff --git a/saved_networks/bird-dqn-2890000.meta b/saved_networks/bird-dqn-2890000.meta
diff --git a/saved_networks/bird-dqn-2900000 b/saved_networks/bird-dqn-2900000
diff --git a/saved_networks/bird-dqn-2900000.meta b/saved_networks/bird-dqn-2900000.meta
diff --git a/saved_networks/bird-dqn-2910000 b/saved_networks/bird-dqn-2910000
diff --git a/saved_networks/bird-dqn-2910000.meta b/saved_networks/bird-dqn-2910000.meta
diff --git a/saved_networks/bird-dqn-2920000 b/saved_networks/bird-dqn-2920000
diff --git a/saved_networks/bird-dqn-2920000.meta b/saved_networks/bird-dqn-2920000.meta
diff --git a/saved_networks/checkpoint b/saved_networks/checkpoint
diff --git a/saved_networks/pretrained_model/bird-dqn-policy b/saved_networks/pretrained_model/bird-dqn-policy