Merge pull request #32 from acmucsd/valueiteration

jonzamora · web-flow · commit 2870716a4f80 · 2022-11-06T16:31:47.000-08:00
Merge valueiteration into main
diff --git a/2022/FA22/intro-ai-series/workshop-3-reinforcement-learning/src/valueIterationAgents.py b/2022/FA22/intro-ai-series/workshop-3-reinforcement-learning/src/valueIterationAgents.py
@@ -65,7 +65,22 @@ def runValueIteration(self):
           value iteration, V_k+1(...) depends on V_k(...)'s.
         """
         "*** YOUR CODE HERE ***"
-
+        V_curr = util.Counter()
+        for _ in range(self.iterations):
+            states = self.mdp.getStates()
+            for state in states:
+                if self.mdp.isTerminal(state):
+                    V_curr[state] = float(0)
+                    continue
+                Q_curr = []
+                for act in self.mdp.getPossibleActions(state):
+                    qValue = self.computeQValueFromValues(state, act)
+                    Q_curr.append(qValue)
+                V_curr[state] = max(Q_curr)
+            self.values = V_curr.copy()
+                
+                
+                    
     def getValue(self, state):
         """
           Return the value of the state (computed in __init__).
@@ -78,7 +93,16 @@ def computeQValueFromValues(self, state, action):
           value function stored in self.values.
         """
         "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        q = 0
+        if not self.mdp.isTerminal(state):
+            for n in self.mdp.getTransitionStatesAndProbs(state, action):
+                nextState = n[0]
+                prob = n[1]
+                q += prob*(self.mdp.getReward(state, action, nextState) + self.discount*self.values[nextState])
+        return q
+
+
+
 
     def computeActionFromValues(self, state):
         """
@@ -90,6 +114,10 @@ def computeActionFromValues(self, state):
           terminal state, you should return None.
         """
         "*** YOUR CODE HERE ***"
+        policy = util.Counter();
+        for action in self.mdp.getPossibleActions(state):
+            policy[action] = self.getQValue(state, action);
+        return policy.argMax();
         util.raiseNotDefined()
 
     def getPolicy(self, state):