diff --git a/FlappyAgent.py b/FlappyAgent.py
new file mode 100644
index 0000000..65619eb
--- /dev/null
+++ b/FlappyAgent.py
@@ -0,0 +1,41 @@
+#import the libraries
+import numpy as np
+from keras.models import Sequential
+from keras.layers import Dense, Conv2D, Flatten
+from keras import optimizers
+from keras.models import load_model
+from collections import deque
+from skimage.color import rgb2gray
+from skimage.transform import resize
+
+dqn = load_model('dqn_3.h5')
+iter = 0
+stacked_x = []
+
+def process_screen(x):
+	x = x[50:270, :320]
+	return 256*resize(rgb2gray(x), (80,80))
+
+def fill_stack(screen):
+	deq = deque([screen, screen, screen, screen], maxlen=4)
+	return deq
+	
+def FlappyPolicy(state, screen):
+	global stacked_x
+	global iter
+	global dqn
+	
+	moves = [1,0]
+	iter = iter + 1
+	screen_x = process_screen(screen)
+
+	if iter == 1:
+		stacked_x = fill_stack(screen_x)
+		x = np.stack(stacked_x, axis=-1)
+	else:
+		stacked_x.append(screen_x)
+		x = np.stack(stacked_x, axis=-1)
+
+	QX = dqn.predict(np.array([x]))
+	action =  moves[np.argmax(QX)]*119
+	return action
\ No newline at end of file
diff --git a/Mytrain.ipynb b/Mytrain.ipynb
new file mode 100644
index 0000000..760e54d
--- /dev/null
+++ b/Mytrain.ipynb
@@ -0,0 +1,334 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n",
+      "Using TensorFlow backend.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "couldn't import doomish\n",
+      "Couldn't import doom\n"
+     ]
+    }
+   ],
+   "source": [
+    "#import the libraries\n",
+    "import numpy as np\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense, Conv2D, Flatten\n",
+    "from keras import optimizers\n",
+    "from keras.models import load_model\n",
+    "from collections import deque\n",
+    "from skimage.color import rgb2gray\n",
+    "from skimage.transform import resize\n",
+    "from ple import PLE\n",
+    "from ple.games.flappybird import FlappyBird\n",
+    "import matplotlib.pyplot as plt\n",
+    "import timeit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#defining reward model\n",
+    "def clip_reward(rew):\n",
+    "    #reward policy\n",
+    "    rr=0\n",
+    "    if rew>0:\n",
+    "        rr=1\n",
+    "    if rew<0:\n",
+    "        rr=-3\n",
+    "    return rr\n",
+    "    \n",
+    "def greedy_action(convnet, x):\n",
+    "    #netword action\n",
+    "    QX = convnet.predict(np.array([x]))\n",
+    "    return np.argmax(QX)\n",
+    "\n",
+    "def process_screen(x):\n",
+    "    #processing screen\n",
+    "    x = x[50:270, :320]\n",
+    "    return 256*resize(rgb2gray(x), (80, 80))\n",
+    "\n",
+    "def epsilon(step):\n",
+    "    #epsilon evolution\n",
+    "    if step<1e6:\n",
+    "        return 1.-step*9e-7\n",
+    "    return .0001\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# A class for the replay memory\n",
+    "class MemoryBuffer:\n",
+    "    def __init__(self, length, screen_shape, action_shape):\n",
+    "        self.length = length\n",
+    "        self.screen_shape = screen_shape\n",
+    "        self.action_shape = action_shape\n",
+    "        shape = (length,) + screen_shape\n",
+    "        self.screens_x = np.zeros(shape, dtype=np.uint8) # starting states\n",
+    "        self.screens_y = np.zeros(shape, dtype=np.uint8) # resulting states\n",
+    "        shape = (length,) + action_shape\n",
+    "        self.actions = np.zeros(shape, dtype=np.uint8) # actions\n",
+    "        self.rewards = np.zeros((length,1), dtype=np.int8) # rewards\n",
+    "        self.terminals = np.zeros((length,1), dtype=np.bool) # true if resulting state is terminal\n",
+    "        self.terminals[-1] = True\n",
+    "        self.index = 0 # points one position past the last inserted element\n",
+    "        self.size = 0 # current size of the buffer\n",
+    "    \n",
+    "    def append(self, screenx, a, r, screeny, d):\n",
+    "        self.screens_x[self.index] = screenx\n",
+    "        #plt.imshow(screenx)\n",
+    "        #plt.show()\n",
+    "        #plt.imshow(self.screens_x[self.index])\n",
+    "        #plt.show()\n",
+    "        self.actions[self.index] = a\n",
+    "        self.rewards[self.index] = r\n",
+    "        self.screens_y[self.index] = screeny\n",
+    "        self.terminals[self.index] = d\n",
+    "        self.index = (self.index+1) % self.length\n",
+    "        self.size = np.min([self.size+1,self.length])\n",
+    "    \n",
+    "    def stacked_frames_x(self, index):\n",
+    "        im_deque = deque(maxlen=4)\n",
+    "        pos = index % self.length\n",
+    "        for i in range(4): # todo\n",
+    "            im = self.screens_x[pos]\n",
+    "            im_deque.appendleft(im)\n",
+    "            test_pos = (pos-1) % self.length\n",
+    "            if self.terminals[test_pos] == False:\n",
+    "                pos = test_pos\n",
+    "        return np.stack(im_deque, axis=-1)\n",
+    "    \n",
+    "    def stacked_frames_y(self, index):\n",
+    "        im_deque = deque(maxlen=4)\n",
+    "        pos = index % self.length\n",
+    "        for i in range(4): # todo\n",
+    "            im = self.screens_y[pos]\n",
+    "            im_deque.appendleft(im)\n",
+    "            test_pos = (pos-1) % self.length\n",
+    "            if self.terminals[test_pos] == False:\n",
+    "                pos = test_pos\n",
+    "        return np.stack(im_deque, axis=-1)\n",
+    "    \n",
+    "    def minibatch(self, size):\n",
+    "        #return np.random.choice(self.data[:self.size], size=sz, replace=False)\n",
+    "        indices = np.random.choice(self.size, size=size, replace=False)\n",
+    "        x = np.zeros((size,)+self.screen_shape+(4,))\n",
+    "        y = np.zeros((size,)+self.screen_shape+(4,))\n",
+    "        for i in range(size):\n",
+    "            x[i] = self.stacked_frames_x(indices[i])\n",
+    "            y[i] = self.stacked_frames_y(indices[i])\n",
+    "        return x, self.actions[indices], self.rewards[indices], y, self.terminals[indices]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''\n",
+    "# Creating model\n",
+    "\n",
+    "# Convolutional model for keras\n",
+    "dqn = Sequential()\n",
+    "#1st layer\n",
+    "dqn.add(Conv2D(filters=16, kernel_size=(8,8), strides=4, activation=\"relu\", input_shape=(80, 80,4)))\n",
+    "#2nd layer\n",
+    "dqn.add(Conv2D(filters=32, kernel_size=(4,4), strides=2, activation=\"relu\"))\n",
+    "dqn.add(Flatten())\n",
+    "#3rd layer\n",
+    "dqn.add(Dense(units=256, activation=\"relu\"))\n",
+    "#output layer\n",
+    "dqn.add(Dense(units=2, activation=\"linear\"))\n",
+    "\n",
+    "dqn.compile(optimizer = \"rmsprop\", loss = \"mean_squared_error\")\n",
+    "adam = optimizers.Adam(lr = 1e-4)\n",
+    "dqn.compile(loss = \"mean_squared_error\", optimizer = adam)\n",
+    "dqn.save('dqn_3.h5')\n",
+    "dqn_target = load_model('dqn_3.h5') \n",
+    "'''\n",
+    "\n",
+    "#Load keras network\n",
+    "dqn = load_model('dqn_3.h5')\n",
+    "adam = optimizers.Adam(lr = 1e-4)\n",
+    "dqn.compile(loss = \"mean_squared_error\", optimizer = adam)\n",
+    "dqn.save('dqn_3.h5')\n",
+    "dqn_target = load_model('dqn_3.h5') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#One-time simulation parameters (run only the first time)\n",
+    "replay_memory_size = 10000\n",
+    "replay_memory = MemoryBuffer(replay_memory_size, (80, 80), (1,))\n",
+    "step = 0\n",
+    "w_transfer = 5000\n",
+    "mini_batch_size = 32\n",
+    "gamma = 0.99"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-19-761fbb2d2a6b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     47\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mstep\u001b[0m \u001b[1;33m>\u001b[0m \u001b[0mmini_batch_size\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m     48\u001b[0m             \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mA\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mR\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mY\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mD\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mreplay_memory\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mminibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmini_batch_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m             \u001b[0mQY\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdqn_target\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mY\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     50\u001b[0m             \u001b[0mQYmax\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mQY\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmini_batch_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m             \u001b[0mupdate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mR\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mgamma\u001b[0m \u001b[1;33m*\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mD\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mQYmax\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\models.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m   1025\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m   1026\u001b[0m         return self.model.predict(x, batch_size=batch_size, verbose=verbose,\n\u001b[0;32m-> 1027\u001b[0;31m                                   steps=steps)\n\u001b[0m\u001b[1;32m   1028\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m   1029\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mpredict_on_batch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m   1798\u001b[0m         \u001b[0mf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m   1799\u001b[0m         return self._predict_loop(f, ins, batch_size=batch_size,\n\u001b[0;32m-> 1800\u001b[0;31m                                   verbose=verbose, steps=steps)\n\u001b[0m\u001b[1;32m   1801\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m   1802\u001b[0m     def train_on_batch(self, x, y,\n",
+      "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m_predict_loop\u001b[0;34m(self, f, ins, batch_size, verbose, steps)\u001b[0m\n\u001b[1;32m   1295\u001b[0m                     \u001b[0mins_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_slice_arrays\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_ids\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m   1296\u001b[0m                 \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1297\u001b[0;31m                     \u001b[0mins_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_slice_arrays\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1298\u001b[0m                 \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mindices_for_conversion_to_dense\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m   1299\u001b[0m                     \u001b[0mins_batch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mins_batch\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m_slice_arrays\u001b[0;34m(arrays, start, stop)\u001b[0m\n\u001b[1;32m    380\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'shape'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    381\u001b[0m                 \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstart\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m             \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    383\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    384\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mstop\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mC:\\Users\\RASSB\\Anaconda3\\envs\\tensorflowgpu\\lib\\site-packages\\keras\\engine\\training.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    380\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'shape'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    381\u001b[0m                 \u001b[0mstart\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mstart\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m--> 382\u001b[0;31m             \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    383\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m    384\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;32mNone\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstart\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0mstop\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "test = 0 #0 -> executes only greedy-action\n",
+    "nb_games = 5 #number of games to be played\n",
+    "\n",
+    "#Flappy configuration\n",
+    "game = FlappyBird(graphics=\"fixed\") # use \"fancy\" for full background, random bird color and random pipe color, use \"fixed\" (default) for black background and constant bird and pipe colors.\n",
+    "p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen = True)\n",
+    "#flappy start\n",
+    "p.init()\n",
+    "p.reset_game()\n",
+    "#get possible actions for the player\n",
+    "actions = p.getActionSet()\n",
+    "#process the screen\n",
+    "screen_x = process_screen(p.getScreenRGB())\n",
+    "#stocks and fills the stacks\n",
+    "stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)\n",
+    "x = np.stack(stacked_x, axis=-1)\n",
+    "start = timeit.default_timer()\n",
+    "#initialization of vectors\n",
+    "cumulated = np.zeros((nb_games))\n",
+    "cumulated_art = np.zeros((nb_games))\n",
+    "#main loop\n",
+    "for i in range(nb_games):\n",
+    "    p.reset_game()\n",
+    "    screen_x = process_screen(p.getScreenRGB())\n",
+    "    stacked_x = deque([screen_x, screen_x, screen_x, screen_x], maxlen=4)\n",
+    "    x = np.stack(stacked_x, axis=-1) \n",
+    "    while(not p.game_over()): #while alive\n",
+    "        step = step+1 #step evolution\n",
+    "        #show nb of games played\n",
+    "        if ((i+1)%100 == 0):\n",
+    "            print('Jeu',i+1)\n",
+    "            \n",
+    "        #action selection\n",
+    "        if np.random.rand() < test*epsilon(step):\n",
+    "            a = np.random.randint(2)\n",
+    "        else:\n",
+    "            a = greedy_action(dqn, x)\n",
+    "        #game reward\n",
+    "        reward = p.act(actions[a])\n",
+    "        #policy reward\n",
+    "        r = clip_reward(reward)\n",
+    "        #next screen\n",
+    "        screen_y = process_screen(p.getScreenRGB())\n",
+    "        d = p.game_over() #dead or alive???\n",
+    "        replay_memory.append(screen_x, a, r, screen_y, d)\n",
+    "        # train\n",
+    "        if step > mini_batch_size:\n",
+    "            X,A,R,Y,D = replay_memory.minibatch(mini_batch_size)\n",
+    "            QY = dqn_target.predict(Y)\n",
+    "            QYmax = QY.max(1).reshape((mini_batch_size,1))\n",
+    "            update = R + gamma * (1-D) * QYmax\n",
+    "            QX = dqn.predict(X)\n",
+    "            QX[np.arange(mini_batch_size), A.ravel()] = update.ravel()\n",
+    "            dqn.train_on_batch(x=X, y=QX)\n",
+    "\n",
+    "        # transfert weights between networks\n",
+    "        if step > 1 and step % w_transfer == 0:\n",
+    "            print('saving')\n",
+    "            dqn.save('dqn_3.h5')\n",
+    "            print(\"Saving done\")\n",
+    "            dqn_target = load_model('dqn_3.h5')\n",
+    "        #socre\n",
+    "        cumulated[i] = cumulated[i] + reward\n",
+    "        cumulated_art[i] =  cumulated_art[i] + r\n",
+    "        # keep going\n",
+    "        screen_x = screen_y\n",
+    "        stacked_x.append(screen_x)\n",
+    "        x = np.stack(stacked_x, axis=-1)\n",
+    "\n",
+    "#total time played\n",
+    "stop = timeit.default_timer()\n",
+    "temps =stop - start\n",
+    "\n",
+    "print ('temps [s]',(temps))\n",
+    "\n",
+    "print('saving')\n",
+    "dqn.save('dqn_3.h5')\n",
+    "print('Saving done')\n",
+    "print('fini ^^')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/dqn_3.h5 b/dqn_3.h5
new file mode 100644
index 0000000..2547060
Binary files /dev/null and b/dqn_3.h5 differ
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..333e30e
--- /dev/null
+++ b/run.py
@@ -0,0 +1,32 @@
+# You're not allowed to change this file
+from ple.games.flappybird import FlappyBird
+from ple import PLE
+import numpy as np
+from FlappyAgent import FlappyPolicy
+
+game = FlappyBird()
+p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)
+# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.
+
+p.init()
+reward = 0.0
+
+nb_games = 5
+cumulated = np.zeros((nb_games))
+
+for i in range(nb_games):
+    p.reset_game()
+    
+    while(not p.game_over()):
+        state = game.getGameState()
+        screen = p.getScreenRGB()
+        action=FlappyPolicy(state, screen) ### Your job is to define this function.
+        #print(action)
+        reward = p.act(action)
+        cumulated[i] = cumulated[i] + reward
+
+average_score = np.mean(cumulated)
+max_score = np.max(cumulated)
+
+print('avg:',average_score)
+print('max:',max_score)
\ No newline at end of file