1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Thu Jun 3 13:06:20 2021
4
+ @author: Ayush
5
+ """
6
+
7
+ import gym
8
+ env = gym .make ("SpaceInvaders-v0" )
9
+
10
+ episodes = 10
11
+
12
+ for episode in range (1 ,episodes ):
13
+ state = env .reset ()
14
+ done = False
15
+ score = 0
16
+
17
+ while not done :
18
+ env .render ()
19
+ state , reward ,done , info = env .step (env .action_space .sample ())
20
+ score += reward
21
+ print ('Episode: {}\n Score: {}' .format (episode , score ))
22
+ env .close ()
23
+
24
+ # Import Neural Network Packages
25
+ from tensorflow .keras .models import Sequential
26
+ from tensorflow .keras .layers import Dense , Flatten , Conv2D
27
+ from tensorflow .keras .optimizers import Adam
28
+
29
+ def build_model (height ,width ,channels ,actions ):
30
+ model = Sequential ()
31
+ model .add (Conv2D (32 ,(8 ,8 ),strides = (4 ,4 ), activation = 'relu' , input_shape = (3 , height ,width ,channels )))
32
+ model .add (Conv2D (64 ,(4 ,4 ),strides = (2 ,2 ), activation = 'relu' ))
33
+ model .add (Conv2D (64 ,(4 ,4 ),strides = (2 ,2 ), activation = 'relu' ))
34
+ model .add (Flatten ())
35
+ model .add (Dense (512 , activation = 'relu' ))
36
+ model .add (Dense (256 ,activation = 'relu' ))
37
+ model .add (Dense (64 ,activation = 'relu' ))
38
+ model .add (Dense (actions ,activation = 'linear' ))
39
+ return model
40
+
41
+ height ,width ,channels = env .observation_space .shape
42
+ actions = env .action_space .n
43
+
44
+ model = build_model (height ,width ,channels ,actions )
45
+
46
+
47
+ #Importing keras-rl2 reinforcement learning functions
48
+ from rl .agents import DQNAgent
49
+ from rl .memory import SequentialMemory
50
+ from rl .policy import LinearAnnealedPolicy , EpsGreedyQPolicy
51
+
52
+ def build_agent (model ,actions ):
53
+ policy = LinearAnnealedPolicy (EpsGreedyQPolicy (), attr = 'eps' , value_max = 1. , value_min = .1 , value_test = .2 , nb_steps = 10000 )
54
+ memory = SequentialMemory (limit = 2000 , window_length = 3 )
55
+ dqn = DQNAgent (model = model , memory = memory , policy = policy , enable_dueling_network = True , dueling_type = 'avg' , nb_actions = actions , nb_steps_warmup = 1000 )
56
+ return dqn
57
+
58
+ dqn = build_agent (model , actions )
59
+
60
+
61
+ dqn .compile (Adam (lr = 0.001 ))
62
+
63
+ dqn .fit (env ,nb_steps = 40000 , visualize = True , verbose = 1 )
64
+
65
+ import numpy as np
66
+ scores = dqn .test (env ,nb_episodes = 10 ,visualize = True )
67
+ print (np .mean (scores .history ['episode_reward' ]))
68
+
69
+ dqn .save_weights ('models/dqn.h5f' )
70
+ dqn .load_weights ('models/dqn.h5f' )
0 commit comments