Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Jiakai Song
pdqn_hppo
Commits
4b35b90c
Commit
4b35b90c
authored
Sep 01, 2021
by
Jiakai Song
Browse files
rename directory
parent
86eaf68e
Changes
9
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
4b35b90c
...
...
@@ -8,6 +8,6 @@ python run_pdqn.py
### Load trained model
python load.py --algo='hppo' --load_dir='models/
_
HPPO' [--render --no_sync]
python load.py --algo='hppo' --load_dir='models/HPPO
_1v0
' [--render --no_sync]
python load.py --algo='pdqn' --load_dir='models/
_
PDQN' [--render --no_sync]
python load.py --algo='pdqn' --load_dir='models/PDQN
_1v0
' [--render --no_sync]
envs/soccer_against_keeper.py
View file @
4b35b90c
...
...
@@ -18,7 +18,7 @@ class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
"""
def
__init__
(
self
,
sync
=
True
,
offense_on_ball
=
Tru
e
):
def
__init__
(
self
,
sync
=
True
,
offense_on_ball
=
Fals
e
):
self
.
offense_on_ball
=
int
(
offense_on_ball
)
super
(
SoccerAgainstKeeperEnv
,
self
).
__init__
(
sync
=
sync
)
low0
=
np
.
array
([
-
1
,
-
1
,
0
],
dtype
=
np
.
float32
)
...
...
models/
_
HPPO/actor.pkl
→
models/HPPO
_1v0
/actor.pkl
View file @
4b35b90c
File moved
models/
_
HPPO/critic.pkl
→
models/HPPO
_1v0
/critic.pkl
View file @
4b35b90c
File moved
models/
_
HPPO/info.pkl
→
models/HPPO
_1v0
/info.pkl
View file @
4b35b90c
File moved
models/
_
PDQN/info.pkl
→
models/PDQN
_1v0
/info.pkl
View file @
4b35b90c
File moved
models/
_
PDQN/p_net.pkl
→
models/PDQN
_1v0
/p_net.pkl
View file @
4b35b90c
File moved
models/
_
PDQN/q_net.pkl
→
models/PDQN
_1v0
/q_net.pkl
View file @
4b35b90c
File moved
run_pdqn.py
View file @
4b35b90c
...
...
@@ -51,6 +51,7 @@ if __name__ == '__main__':
ep_returns
=
[]
lens
=
[]
max_return
=
0
total_steps
=
0
for
ep
in
range
(
args
.
num_episode
):
state
=
env
.
reset
()
ep_r
=
0
...
...
@@ -59,6 +60,7 @@ if __name__ == '__main__':
q
.
append
(
state
)
stack_state
=
np
.
array
(
q
).
flatten
()
for
t
in
itertools
.
count
():
total_steps
+=
1
hybrid_action
,
action
,
params
=
agent
.
choose_action
(
stack_state
)
next_state
,
reward
,
done
,
info
=
env
.
step
(
hybrid_action
)
q
.
append
(
next_state
)
...
...
@@ -76,7 +78,7 @@ if __name__ == '__main__':
if
(
ep
+
1
)
%
args
.
save_interval
==
0
:
avg_return
=
np
.
mean
(
ep_returns
[
-
args
.
save_interval
:])
writer
.
add_scalar
(
'reward/episode'
,
avg_return
,
ep
+
1
)
writer
.
add_scalar
(
'length/episode'
,
np
.
mean
(
lens
[
-
args
.
save_interval
:]),
ep
+
1
)
writer
.
add_scalar
(
'
episode_
length/episode'
,
np
.
mean
(
lens
[
-
args
.
save_interval
:]),
ep
+
1
)
if
avg_return
>
max_return
:
agent
.
save_model
(
args
.
save_dir
)
max_return
=
avg_return
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment