pytorch · sekyondaMeta · Jun 16, 2025 · Jun 4, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
@@ -91,6 +91,16 @@
     "cpu"
 )
 
+# set the seeds for reproducibility
+seed = 42
+random.seed(seed)
+torch.manual_seed(seed)
+env.reset(seed=seed)
+env.action_space.seed(seed)
+env.observation_space.seed(seed)
+if torch.cuda.is_available(): 
+    torch.cuda.manual_seed(seed)
+
 
 ######################################################################
 # Replay Memory
@@ -253,13 +263,14 @@ def forward(self, x):
 # EPS_DECAY controls the rate of exponential decay of epsilon, higher means a slower decay
 # TAU is the update rate of the target network
 # LR is the learning rate of the ``AdamW`` optimizer
+
 BATCH_SIZE = 128
 GAMMA = 0.99
-EPS_START = 0.9
-EPS_END = 0.05
-EPS_DECAY = 1000
+EPS_START = 1
+EPS_END = 0.01
+EPS_DECAY = 2500
 TAU = 0.005
-LR = 1e-4
+LR = 5e-4
 
 # Get number of actions from gym action space
 n_actions = env.action_space.n