Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
submerged_kangaroo
Reinforcement
Commits
db760cd6
Commit
db760cd6
authored
Oct 24, 2020
by
anon
Browse files
Refactoring
parent
f26a3175
Changes
1
Hide whitespace changes
Inline
Side-by-side
valueIterationAgents.py
View file @
db760cd6
...
...
@@ -69,10 +69,7 @@ class ValueIterationAgent(ValueEstimationAgent):
rewards
=
util
.
Counter
()
actions
=
self
.
mdp
.
getPossibleActions
(
state
)
for
action
in
actions
:
transitionStatesAndProbs
=
self
.
mdp
.
getTransitionStatesAndProbs
(
state
,
action
)
for
stateProb
in
transitionStatesAndProbs
:
nextState
,
probability
=
stateProb
rewards
[
action
]
+=
probability
*
(
self
.
mdp
.
getReward
(
state
,
action
,
nextState
)
+
self
.
discount
*
self
.
values
[
nextState
])
rewards
[
action
]
=
self
.
getQValue
(
state
,
action
)
# Copied from util.Counter code
all
=
list
(
rewards
.
items
())
values
=
[
x
[
1
]
for
x
in
all
]
...
...
@@ -104,8 +101,6 @@ class ValueIterationAgent(ValueEstimationAgent):
transitionStatesAndProbs
=
self
.
mdp
.
getTransitionStatesAndProbs
(
state
,
action
)
qValue
=
0
for
nextState
,
probability
in
transitionStatesAndProbs
:
#Stupid, but why does it only produce north q values????
#qValue += probability * self.mdp.getReward(state, action, nextState)
qValue
+=
probability
*
(
self
.
discount
*
self
.
values
[
nextState
]
+
self
.
mdp
.
getReward
(
state
,
action
,
nextState
))
return
qValue
...
...
@@ -165,7 +160,23 @@ class AsynchronousValueIterationAgent(ValueIterationAgent):
ValueIterationAgent
.
__init__
(
self
,
mdp
,
discount
,
iterations
)
def
runValueIteration
(
self
):
"*** YOUR CODE HERE ***"
states
=
self
.
mdp
.
getStates
()
for
i
in
range
(
self
.
iterations
):
try
:
state
=
states
.
pop
(
0
)
except
IndexError
:
states
=
self
.
mdp
.
getStates
()
rewards
=
util
.
Counter
()
actions
=
self
.
mdp
.
getPossibleActions
(
state
)
for
action
in
actions
:
rewards
[
action
]
=
self
.
getQValue
(
state
,
action
)
# Copied from util.Counter code
all
=
list
(
rewards
.
items
())
values
=
[
x
[
1
]
for
x
in
all
]
if
not
self
.
mdp
.
isTerminal
(
state
):
self
.
values
[
state
]
=
max
(
values
)
else
:
self
.
values
[
state
]
=
0
class
PrioritizedSweepingValueIterationAgent
(
AsynchronousValueIterationAgent
):
"""
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment