Commit 90f62d80 authored by anon's avatar anon
Browse files

A1 completed

parent ec1554ca
......@@ -21,7 +21,7 @@
def question2():
answerDiscount = 0.9
answerNoise = 0.2
answerNoise = 0.014
return answerDiscount, answerNoise
def question3a():
......
......@@ -66,8 +66,26 @@ class ValueIterationAgent(ValueEstimationAgent):
for i in range(self.iterations):
values_kPlus1 = util.Counter()
for state in states:
rewards = util.Counter()
actions = self.mdp.getPossibleActions(state)
for action in actions:
transitionStatesAndProbs = self.mdp.getTransitionStatesAndProbs(state, action)
for stateProb in transitionStatesAndProbs:
nextState, probability = stateProb
rewards[action] += probability * ( self.mdp.getReward(state, action, nextState) + self.discount * self.values[nextState])
# Copied from util.Counter code
all = list(rewards.items())
values = [x[1] for x in all]
if not self.mdp.isTerminal(state):
values_kPlus1[state] = max(values)
else:
# TODO Obviously buggy, what is score for terminal state? Can it continue after terminal state
# Might just actually work. Terminal state shouldn't matter, as the reward comes from the transition
values_kPlus1[state] = 0
self.values = values_kPlus1
self.values_kPlus1[state] =
def getValue(self, state):
......@@ -83,7 +101,13 @@ class ValueIterationAgent(ValueEstimationAgent):
value function stored in self.values.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
transitionStatesAndProbs = self.mdp.getTransitionStatesAndProbs(state, action)
qValue = 0
for nextState, probability in transitionStatesAndProbs:
#Stupid, but why does it only produce north q values????
#qValue += probability * self.mdp.getReward(state, action, nextState)
qValue += probability * (self.discount * self.values[nextState] + self.mdp.getReward(state, action, nextState))
return qValue
def computeActionFromValues(self, state):
"""
......@@ -95,7 +119,13 @@ class ValueIterationAgent(ValueEstimationAgent):
terminal state, you should return None.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
if self.mdp.isTerminal(state):
return None
rewards = util.Counter()
actions = self.mdp.getPossibleActions(state)
for action in actions:
rewards[action] = self.computeQValueFromValues(state, action)
return rewards.argMax()
def getPolicy(self, state):
return self.computeActionFromValues(state)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment