Pi-Star-Lab · noamgariani11 · Oct 1, 2024 · Oct 1, 2024
diff --git a/Solvers/Monte_Carlo.py b/Solvers/Monte_Carlo.py
@@ -76,7 +76,7 @@ def make_epsilon_greedy_policy(self):
         Use:
             self.Q: A dictionary that maps from state -> action-values.
                 Each value is a numpy array of length nA
-            self.options.epsilon: Chance the sample a random action. Float betwen 0 and 1.
+            self.options.epsilon: Chance the sample a random action. Float between 0 and 1.
             self.env.action_space.n: Number of actions in the environment.
 
         Returns:
@@ -146,7 +146,7 @@ def train_episode(self):
         Run a single episode of Monte Carlo Control Off-Policy Control using Weighted Importance Sampling.
 
         Use:
-            elf.env: OpenAI environment.
+            self.env: OpenAI environment.
             self.options.steps: steps per episode
             self.behavior_policy(state): returns a soft policy which is the
                 behavior policy (act according to this policy)