Commit 70e6164f authored by anon's avatar anon
Browse files

Refactor the QLearning update equations

parent 6dead649
......@@ -126,8 +126,8 @@ class QLearningAgent(ReinforcementAgent):
it will be called on your behalf
"""
"*** YOUR CODE HERE ***"
sample = self.alpha * (reward + self.discount * self.computeValueFromQValues(nextState))
self.qValues[(state, action)] = (1- self.alpha) * self.getQValue(state, action) + sample
sample = reward + self.discount * self.computeValueFromQValues(nextState)
self.qValues[(state, action)] = (1- self.alpha) * self.getQValue(state, action) + self.alpha * sample
def getPolicy(self, state):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment