Commit 4b35b90c authored by Jiakai Song's avatar Jiakai Song
Browse files

rename directory

parent 86eaf68e
......@@ -8,6 +8,6 @@ python run_pdqn.py
### Load trained model
python load.py --algo='hppo' --load_dir='models/_HPPO' [--render --no_sync]
python load.py --algo='hppo' --load_dir='models/HPPO_1v0' [--render --no_sync]
python load.py --algo='pdqn' --load_dir='models/_PDQN' [--render --no_sync]
python load.py --algo='pdqn' --load_dir='models/PDQN_1v0' [--render --no_sync]
......@@ -18,7 +18,7 @@ class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
"""
def __init__(self, sync=True, offense_on_ball=True):
def __init__(self, sync=True, offense_on_ball=False):
self.offense_on_ball = int(offense_on_ball)
super(SoccerAgainstKeeperEnv, self).__init__(sync=sync)
low0 = np.array([-1, -1, 0], dtype=np.float32)
......
......@@ -51,6 +51,7 @@ if __name__ == '__main__':
ep_returns = []
lens = []
max_return = 0
total_steps = 0
for ep in range(args.num_episode):
state = env.reset()
ep_r = 0
......@@ -59,6 +60,7 @@ if __name__ == '__main__':
q.append(state)
stack_state = np.array(q).flatten()
for t in itertools.count():
total_steps += 1
hybrid_action, action, params = agent.choose_action(stack_state)
next_state, reward, done, info = env.step(hybrid_action)
q.append(next_state)
......@@ -76,7 +78,7 @@ if __name__ == '__main__':
if (ep + 1) % args.save_interval == 0:
avg_return = np.mean(ep_returns[-args.save_interval:])
writer.add_scalar('reward/episode', avg_return, ep + 1)
writer.add_scalar('length/episode', np.mean(lens[-args.save_interval:]), ep + 1)
writer.add_scalar('episode_length/episode', np.mean(lens[-args.save_interval:]), ep + 1)
if avg_return > max_return:
agent.save_model(args.save_dir)
max_return = avg_return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment