Commit 7d31ca54 authored by anon's avatar anon
Browse files

Implement Q-Learning Agent

parent 2df80308
......@@ -41,6 +41,7 @@ class QLearningAgent(ReinforcementAgent):
def __init__(self, **args):
"You can initialize Q-values here..."
ReinforcementAgent.__init__(self, **args)
self.qValues = util.Counter()
"*** YOUR CODE HERE ***"
......@@ -51,7 +52,8 @@ class QLearningAgent(ReinforcementAgent):
or the Q node value otherwise
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
# Will this work? Or will it fail because tuple dict indexing?
return self.qValues[(state, action)]
def computeValueFromQValues(self, state):
......@@ -62,7 +64,11 @@ class QLearningAgent(ReinforcementAgent):
terminal state, you should return a value of 0.0.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
legalActions = self.getLegalActions(state)
if len(legalActions) == 0:
return 0.0
qValues = [self.getQValue(state, action) for action in legalActions]
return max(qValues)
def computeActionFromQValues(self, state):
"""
......@@ -71,7 +77,23 @@ class QLearningAgent(ReinforcementAgent):
you should return None.
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
legalActions = list(self.getLegalActions(state))
if len(legalActions) == 0:
return None
# Shuffle so the argmax action is chosen randomly
# random.shuffle(legalActions)
qValues = [self.getQValue(state, action) for action in legalActions]
maxQ = qValues[0]
maxAction = 0
# This might not work if the first element is the max???
for i, val in enumerate(qValues):
if val >= maxQ:
maxAction = i
maxQ = val
return legalActions[maxAction]
def getAction(self, state):
"""
......@@ -84,13 +106,15 @@ class QLearningAgent(ReinforcementAgent):
HINT: You might want to use util.flipCoin(prob)
HINT: To pick randomly from a list, use random.choice(list)
"""
# Pick Action
legalActions = self.getLegalActions(state)
action = None
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
legalActions = self.getLegalActions(state)
if len(legalActions) == 0:
return None
if util.flipCoin(self.epsilon):
return random.choice(legalActions)
else:
return self.computeActionFromQValues(state)
return action
def update(self, state, action, nextState, reward):
"""
......@@ -102,7 +126,9 @@ class QLearningAgent(ReinforcementAgent):
it will be called on your behalf
"""
"*** YOUR CODE HERE ***"
util.raiseNotDefined()
sample = self.alpha * (reward + self.discount * self.computeValueFromQValues(nextState))
self.qValues[(state, action)] = (1- self.alpha) * self.qValues[(state, action)] + sample
def getPolicy(self, state):
return self.computeActionFromQValues(state)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment