diff --git a/Solvers/Monte_Carlo.py b/Solvers/Monte_Carlo.py index f3d848a..ab83a43 100644 --- a/Solvers/Monte_Carlo.py +++ b/Solvers/Monte_Carlo.py @@ -76,7 +76,7 @@ def make_epsilon_greedy_policy(self): Use: self.Q: A dictionary that maps from state -> action-values. Each value is a numpy array of length nA - self.options.epsilon: Chance the sample a random action. Float betwen 0 and 1. + self.options.epsilon: Chance the sample a random action. Float between 0 and 1. self.env.action_space.n: Number of actions in the environment. Returns: @@ -146,7 +146,7 @@ def train_episode(self): Run a single episode of Monte Carlo Control Off-Policy Control using Weighted Importance Sampling. Use: - elf.env: OpenAI environment. + self.env: OpenAI environment. self.options.steps: steps per episode self.behavior_policy(state): returns a soft policy which is the behavior policy (act according to this policy)