diff --git a/FlappyAgent.py b/FlappyAgent.py new file mode 100644 index 0000000..65619eb --- /dev/null +++ b/FlappyAgent.py @@ -0,0 +1,41 @@ +#import the libraries +import numpy as np +from keras.models import Sequential +from keras.layers import Dense, Conv2D, Flatten +from keras import optimizers +from keras.models import load_model +from collections import deque +from skimage.color import rgb2gray +from skimage.transform import resize + +dqn = load_model('dqn_3.h5') +iter = 0 +stacked_x = [] + +def process_screen(x): + x = x[50:270, :320] + return 256*resize(rgb2gray(x), (80,80)) + +def fill_stack(screen): + deq = deque([screen, screen, screen, screen], maxlen=4) + return deq + +def FlappyPolicy(state, screen): + global stacked_x + global iter + global dqn + + moves = [1,0] + iter = iter + 1 + screen_x = process_screen(screen) + + if iter == 1: + stacked_x = fill_stack(screen_x) + x = np.stack(stacked_x, axis=-1) + else: + stacked_x.append(screen_x) + x = np.stack(stacked_x, axis=-1) + + QX = dqn.predict(np.array([x])) + action = moves[np.argmax(QX)]*119 + return action \ No newline at end of file diff --git a/Mytrain.ipynb b/Mytrain.ipynb new file mode 100644 index 0000000..760e54d --- /dev/null +++ b/Mytrain.ipynb @@ -0,0 +1,334 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", + " from ._conv import register_converters as _register_converters\n", + "Using TensorFlow backend.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "couldn't import doomish\n", + "Couldn't import doom\n" + ] + } + ], + "source": [ + "#import the libraries\n", + "import numpy as np\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Conv2D, Flatten\n", + "from keras import optimizers\n", + "from keras.models import load_model\n", + "from collections import deque\n", + "from skimage.color import rgb2gray\n", + "from skimage.transform import resize\n", + "from ple import PLE\n", + "from ple.games.flappybird import FlappyBird\n", + "import matplotlib.pyplot as plt\n", + "import timeit" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#defining reward model\n", + "def clip_reward(rew):\n", + " #reward policy\n", + " rr=0\n", + " if rew>0:\n", + " rr=1\n", + " if rew<0:\n", + " rr=-3\n", + " return rr\n", + " \n", + "def greedy_action(convnet, x):\n", + " #netword action\n", + " QX = convnet.predict(np.array([x]))\n", + " return np.argmax(QX)\n", + "\n", + "def process_screen(x):\n", + " #processing screen\n", + " x = x[50:270, :320]\n", + " return 256*resize(rgb2gray(x), (80, 80))\n", + "\n", + "def epsilon(step):\n", + " #epsilon evolution\n", + " if step<1e6:\n", + " return 1.-step*9e-7\n", + " return .0001\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# A class for the replay memory\n", + "class MemoryBuffer:\n", + " def __init__(self, length, screen_shape, action_shape):\n", + " self.length = length\n", + " self.screen_shape = screen_shape\n", + " self.action_shape = action_shape\n", + " shape = (length,) + screen_shape\n", + " self.screens_x = np.zeros(shape, dtype=np.uint8) # starting states\n", + " self.screens_y = np.zeros(shape, dtype=np.uint8) # resulting states\n", + " shape = (length,) + action_shape\n", + " self.actions = np.zeros(shape, dtype=np.uint8) # actions\n", + " self.rewards = np.zeros((length,1), dtype=np.int8) # rewards\n", + " self.terminals = np.zeros((length,1), dtype=np.bool) # true if resulting state is terminal\n", + " self.terminals[-1] = True\n", + " self.index = 0 # points one position past the last inserted element\n", + " self.size = 0 # current size of the buffer\n", + " \n", + " def append(self, screenx, a, r, screeny, d):\n", + " self.screens_x[self.index] = screenx\n", + " #plt.imshow(screenx)\n", + " #plt.show()\n", + " #plt.imshow(self.screens_x[self.index])\n", + " #plt.show()\n", + " self.actions[self.index] = a\n", + " self.rewards[self.index] = r\n", + " self.screens_y[self.index] = screeny\n", + " self.terminals[self.index] = d\n", + " self.index = (self.index+1) % self.length\n", + " self.size = np.min([self.size+1,self.length])\n", + " \n", + " def stacked_frames_x(self, index):\n", + " im_deque = deque(maxlen=4)\n", + " pos = index % self.length\n", + " for i in range(4): # todo\n", + " im = self.screens_x[pos]\n", + " im_deque.appendleft(im)\n", + " test_pos = (pos-1) % self.length\n", + " if self.terminals[test_pos] == False:\n", + " pos = test_pos\n", + " return np.stack(im_deque, axis=-1)\n", + " \n", + " def stacked_frames_y(self, index):\n", + " im_deque = deque(maxlen=4)\n", + " pos = index % self.length\n", + " for i in range(4): # todo\n", + " im = self.screens_y[pos]\n", + " im_deque.appendleft(im)\n", + " test_pos = (pos-1) % self.length\n", + " if self.terminals[test_pos] == False:\n", + " pos = test_pos\n", + " return np.stack(im_deque, axis=-1)\n", + " \n", + " def minibatch(self, size):\n", + " #return np.random.choice(self.data[:self.size], size=sz, replace=False)\n", + " indices = np.random.choice(self.size, size=size, replace=False)\n", + " x = np.zeros((size,)+self.screen_shape+(4,))\n", + " y = np.zeros((size,)+self.screen_shape+(4,))\n", + " for i in range(size):\n", + " x[i] = self.stacked_frames_x(indices[i])\n", + " y[i] = self.stacked_frames_y(indices[i])\n", + " return x, self.actions[indices], self.rewards[indices], y, self.terminals[indices]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "# Creating model\n", + "\n", + "# Convolutional model for keras\n", + "dqn = Sequential()\n", + "#1st layer\n", + "dqn.add(Conv2D(filters=16, kernel_size=(8,8), strides=4, activation=\"relu\", input_shape=(80, 80,4)))\n", + "#2nd layer\n", + "dqn.add(Conv2D(filters=32, kernel_size=(4,4), strides=2, activation=\"relu\"))\n", + "dqn.add(Flatten())\n", + "#3rd layer\n", + "dqn.add(Dense(units=256, activation=\"relu\"))\n", + "#output layer\n", + "dqn.add(Dense(units=2, activation=\"linear\"))\n", + "\n", + "dqn.compile(optimizer = \"rmsprop\", loss = \"mean_squared_error\")\n", + "adam = optimizers.Adam(lr = 1e-4)\n", + "dqn.compile(loss = \"mean_squared_error\", optimizer = adam)\n", + "dqn.save('dqn_3.h5')\n", + "dqn_target = load_model('dqn_3.h5') \n", + "'''\n", + "\n", + "#Load keras network\n", + "dqn = load_model('dqn_3.h5')\n", + "adam = optimizers.Adam(lr = 1e-4)\n", + "dqn.compile(loss = \"mean_squared_error\", optimizer = adam)\n", + "dqn.save('dqn_3.h5')\n", + "dqn_target = load_model('dqn_3.h5') " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#One-time simulation parameters (run only the first time)\n", + "replay_memory_size = 10000\n", + "replay_memory = MemoryBuffer(replay_memory_size, (80, 80), (1,))\n", + "step = 0\n", + "w_transfer = 5000\n", + "mini_batch_size = 32\n", + "gamma = 0.99" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mstep\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mmini_batch_size\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mA\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mR\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mY\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mD\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreplay_memory\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mminibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmini_batch_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mQY\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdqn_target\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mY\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0mQYmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mQY\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmini_batch_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mupdate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mR\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mgamma\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mD\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mQYmax\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\models.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m 1025\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1026\u001b[0m return self.model.predict(x, batch_size=batch_size, verbose=verbose,\n\u001b[0;32m-> 1027\u001b[0;31m steps=steps)\n\u001b[0m\u001b[1;32m 1028\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1029\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mpredict_on_batch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m 1798\u001b[0m \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1799\u001b[0m return self._predict_loop(f, ins, batch_size=batch_size,\n\u001b[0;32m-> 1800\u001b[0;31m verbose=verbose, steps=steps)\n\u001b[0m\u001b[1;32m 1801\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1802\u001b[0m def train_on_batch(self, x, y,\n", + "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m_predict_loop\u001b[0;34m(self, f, ins, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m 1295\u001b[0m \u001b[0mins_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_slice_arrays\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_ids\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1296\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1297\u001b[0;31m \u001b[0mins_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_slice_arrays\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1298\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mindices_for_conversion_to_dense\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 1299\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m_slice_arrays\u001b[0;34m(arrays, start, stop)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'shape'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstart\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 383\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mstop\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'shape'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 381\u001b[0m \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstart\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 383\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mstop\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "test = 0 #0 -> executes only greedy-action\n", + "nb_games = 5 #number of games to be played\n", + "\n", + "#Flappy configuration\n", + "game = FlappyBird(graphics=\"fixed\") # use \"fancy\" for full background, random bird color and random pipe color, use \"fixed\" (default) for black background and constant bird and pipe colors.\n", + "p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen = True)\n", + "#flappy start\n", + "p.init()\n", + "p.reset_game()\n", + "#get possible actions for the player\n", + "actions = p.getActionSet()\n", + "#process the screen\n", + "screen_x = process_screen(p.getScreenRGB())\n", + "#stocks and fills the stacks\n", + "stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)\n", + "x = np.stack(stacked_x, axis=-1)\n", + "start = timeit.default_timer()\n", + "#initialization of vectors\n", + "cumulated = np.zeros((nb_games))\n", + "cumulated_art = np.zeros((nb_games))\n", + "#main loop\n", + "for i in range(nb_games):\n", + " p.reset_game()\n", + " screen_x = process_screen(p.getScreenRGB())\n", + " stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)\n", + " x = np.stack(stacked_x, axis=-1) \n", + " while(not p.game_over()): #while alive\n", + " step = step+1 #step evolution\n", + " #show nb of games played\n", + " if ((i+1)%100 == 0):\n", + " print('Jeu',i+1)\n", + " \n", + " #action selection\n", + " if np.random.rand() < test*epsilon(step):\n", + " a = np.random.randint(2)\n", + " else:\n", + " a = greedy_action(dqn, x)\n", + " #game reward\n", + " reward = p.act(actions[a])\n", + " #policy reward\n", + " r = clip_reward(reward)\n", + " #next screen\n", + " screen_y = process_screen(p.getScreenRGB())\n", + " d = p.game_over() #dead or alive???\n", + " replay_memory.append(screen_x, a, r, screen_y, d)\n", + " # train\n", + " if step > mini_batch_size:\n", + " X,A,R,Y,D = replay_memory.minibatch(mini_batch_size)\n", + " QY = dqn_target.predict(Y)\n", + " QYmax = QY.max(1).reshape((mini_batch_size,1))\n", + " update = R + gamma * (1-D) * QYmax\n", + " QX = dqn.predict(X)\n", + " QX[np.arange(mini_batch_size), A.ravel()] = update.ravel()\n", + " dqn.train_on_batch(x=X, y=QX)\n", + "\n", + " # transfert weights between networks\n", + " if step > 1 and step % w_transfer == 0:\n", + " print('saving')\n", + " dqn.save('dqn_3.h5')\n", + " print(\"Saving done\")\n", + " dqn_target = load_model('dqn_3.h5')\n", + " #socre\n", + " cumulated[i] = cumulated[i] + reward\n", + " cumulated_art[i] = cumulated_art[i] + r\n", + " # keep going\n", + " screen_x = screen_y\n", + " stacked_x.append(screen_x)\n", + " x = np.stack(stacked_x, axis=-1)\n", + "\n", + "#total time played\n", + "stop = timeit.default_timer()\n", + "temps =stop - start\n", + "\n", + "print ('temps [s]',(temps))\n", + "\n", + "print('saving')\n", + "dqn.save('dqn_3.h5')\n", + "print('Saving done')\n", + "print('fini ^^')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dqn_3.h5 b/dqn_3.h5 new file mode 100644 index 0000000..2547060 Binary files /dev/null and b/dqn_3.h5 differ diff --git a/run.py b/run.py new file mode 100644 index 0000000..333e30e --- /dev/null +++ b/run.py @@ -0,0 +1,32 @@ +# You're not allowed to change this file +from ple.games.flappybird import FlappyBird +from ple import PLE +import numpy as np +from FlappyAgent import FlappyPolicy + +game = FlappyBird() +p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True) +# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes. + +p.init() +reward = 0.0 + +nb_games = 5 +cumulated = np.zeros((nb_games)) + +for i in range(nb_games): + p.reset_game() + + while(not p.game_over()): + state = game.getGameState() + screen = p.getScreenRGB() + action=FlappyPolicy(state, screen) ### Your job is to define this function. + #print(action) + reward = p.act(action) + cumulated[i] = cumulated[i] + reward + +average_score = np.mean(cumulated) +max_score = np.max(cumulated) + +print('avg:',average_score) +print('max:',max_score) \ No newline at end of file