Skip to content

Commit 2870716

Browse files
authored
Merge pull request #32 from acmucsd/valueiteration
Merge valueiteration into main
2 parents e51df0c + 200734c commit 2870716

File tree

1 file changed

+30
-2
lines changed

1 file changed

+30
-2
lines changed

2022/FA22/intro-ai-series/workshop-3-reinforcement-learning/src/valueIterationAgents.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,22 @@ def runValueIteration(self):
6565
value iteration, V_k+1(...) depends on V_k(...)'s.
6666
"""
6767
"*** YOUR CODE HERE ***"
68-
68+
V_curr = util.Counter()
69+
for _ in range(self.iterations):
70+
states = self.mdp.getStates()
71+
for state in states:
72+
if self.mdp.isTerminal(state):
73+
V_curr[state] = float(0)
74+
continue
75+
Q_curr = []
76+
for act in self.mdp.getPossibleActions(state):
77+
qValue = self.computeQValueFromValues(state, act)
78+
Q_curr.append(qValue)
79+
V_curr[state] = max(Q_curr)
80+
self.values = V_curr.copy()
81+
82+
83+
6984
def getValue(self, state):
7085
"""
7186
Return the value of the state (computed in __init__).
@@ -78,7 +93,16 @@ def computeQValueFromValues(self, state, action):
7893
value function stored in self.values.
7994
"""
8095
"*** YOUR CODE HERE ***"
81-
util.raiseNotDefined()
96+
q = 0
97+
if not self.mdp.isTerminal(state):
98+
for n in self.mdp.getTransitionStatesAndProbs(state, action):
99+
nextState = n[0]
100+
prob = n[1]
101+
q += prob*(self.mdp.getReward(state, action, nextState) + self.discount*self.values[nextState])
102+
return q
103+
104+
105+
82106

83107
def computeActionFromValues(self, state):
84108
"""
@@ -90,6 +114,10 @@ def computeActionFromValues(self, state):
90114
terminal state, you should return None.
91115
"""
92116
"*** YOUR CODE HERE ***"
117+
policy = util.Counter();
118+
for action in self.mdp.getPossibleActions(state):
119+
policy[action] = self.getQValue(state, action);
120+
return policy.argMax();
93121
util.raiseNotDefined()
94122

95123
def getPolicy(self, state):

0 commit comments

Comments
 (0)