Commit 795c39e6 authored by Jiakai Song's avatar Jiakai Song
Browse files

commit

parent 40e90f39
......@@ -2,10 +2,19 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="0cf971cc-497f-4c74-9aa1-f78fca398209" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/runs/hppo_1v1/models/actor.pkl" afterDir="false" />
<change afterPath="$PROJECT_DIR$/runs/hppo_1v1/models/critic.pkl" afterDir="false" />
<change afterPath="$PROJECT_DIR$/runs/pdqn_1v0/models/p_net.pkl" afterDir="false" />
<change afterPath="$PROJECT_DIR$/runs/pdqn_1v0/models/q_net.pkl" afterDir="false" />
<change afterPath="$PROJECT_DIR$/runs/pdqn_1v1/models/p_net.pkl" afterDir="false" />
<change afterPath="$PROJECT_DIR$/runs/pdqn_1v1/models/q_net.pkl" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/algorithms/common.py" beforeDir="false" afterPath="$PROJECT_DIR$/algorithms/agent.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/algorithms/hppo.py" beforeDir="false" afterPath="$PROJECT_DIR$/algorithms/hppo.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/algorithms/pdqn.py" beforeDir="false" afterPath="$PROJECT_DIR$/algorithms/pdqn.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/load.py" beforeDir="false" afterPath="$PROJECT_DIR$/load.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/run_hppo.py" beforeDir="false" afterPath="$PROJECT_DIR$/run_hppo.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/run_pdqn.py" beforeDir="false" afterPath="$PROJECT_DIR$/run_pdqn.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -23,9 +32,16 @@
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/runs/pdqn_1v1/models" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/runs/pdqn_1v1/models" />
<recent name="$PROJECT_DIR$/runs/hppo_1v1/models" />
<recent name="$PROJECT_DIR$/runs/pdqn_1v0/models" />
</key>
</component>
<component name="RunManager">
<configuration name="run_hppo" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="pdqn_hppo" />
......@@ -68,6 +84,10 @@
<servers />
</component>
<component name="WindowStateProjectService">
<state x="587" y="344" key="#com.intellij.fileTypes.FileTypeChooser" timestamp="1631416465242">
<screen x="65" y="24" width="1855" height="1056" />
</state>
<state x="587" y="344" key="#com.intellij.fileTypes.FileTypeChooser/65.24.1855.1056@65.24.1855.1056" timestamp="1631416465241" />
<state x="1180" y="271" width="424" height="482" key="FileChooserDialogImpl" timestamp="1630564222384">
<screen x="65" y="24" width="1855" height="1056" />
</state>
......
......@@ -31,10 +31,10 @@ python run_pdqn.py [--defense_npc]
## Load trained model
python load.py --algo=hppo --load_dir=models/HPPO_1v0 [--render --no_sync]
python load.py --load_dir=euns/hppo_1v0 [--render --no_sync]
python load.py --algo=pdqn --load_dir=models/PDQN_1v0 [--render --no_sync]
python load.py --load_dir=runs/pdqn_1v0 [--render --no_sync]
python load.py --algo=hppo --defense_npc --load_dir=models/HPPO_1v1 [--render --no_sync]
python load.py --load_dir=runs/hppo_1v1 [--render --no_sync]
python load.py --algo=pdqn --defense_npc --load_dir=models/PDQN_1v1 [--render --no_sync]
python load.py --load_dir=runs/pdqn_1v1 [--render --no_sync]
......@@ -115,18 +115,8 @@ class HPPO(Agent):
super(HPPO, self).__init__(env)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# self.device = torch.device('cpu')
load_dir = None
if not load:
self.frame_stack = args.frame_stack
self.hidden_size = args.hidden_size
else:
if args.load_dir is not None:
load_dir = args.load_dir
else:
load_dir = './models/HPPO/'
info = torch.load(os.path.join(load_dir, 'info.pkl'))
self.frame_stack = info['frame_stack']
self.hidden_size = info['hidden_size']
self.frame_stack = args.frame_stack
self.hidden_size = args.hidden_size
self.input_size = self.state_size * self.frame_stack
self.actor = Actor(self.input_size, self.n_discrete, self.params_size, self.hidden_size).to(self.device)
self.critic = Critic(self.input_size, self.hidden_size).to(self.device)
......@@ -144,8 +134,6 @@ class HPPO(Agent):
self.adv_norm = adv_norm
self.coef_entropy = args.coef_entropy
self.grad_clip = grad_clip
else:
self.load_model(load_dir)
def choose_action(self, state, avail_actions, explore=True):
with torch.no_grad():
......@@ -258,8 +246,6 @@ class HPPO(Agent):
save_dir = './models/HPPO/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
info = {'hidden_size': self.hidden_size, 'frame_stack': self.frame_stack}
torch.save(info, os.path.join(save_dir, 'info.pkl'))
torch.save(self.actor.state_dict(), os.path.join(save_dir, 'actor.pkl'))
torch.save(self.critic.state_dict(), os.path.join(save_dir, 'critic.pkl'))
......
......@@ -144,21 +144,10 @@ class PDQN(Agent):
super(PDQN, self).__init__(env)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# self.device = torch.device('cpu')
load_dir = None
if not load:
self.frame_stack = args.frame_stack
self.hidden_size = args.hidden_size
self.mp = args.mp
self.squash = args.squash
else:
if args.load_dir is not None:
load_dir = args.load_dir
info = torch.load(os.path.join(load_dir, 'info.pkl'))
self.frame_stack = info['frame_stack']
self.hidden_size = info['hidden_size']
self.mp = info['mp']
self.squash = info['squash']
self.frame_stack = args.frame_stack
self.hidden_size = args.hidden_size
self.mp = args.mp
self.squash = args.squash
self.input_size = self.state_size * self.frame_stack
self.p_net = ParamsNet(self.input_size, self.params_size, self.squash, self.hidden_size).to(self.device)
self.target_p_net = ParamsNet(self.input_size, self.params_size, self.squash, self.hidden_size).to(self.device)
......@@ -191,7 +180,6 @@ class PDQN(Agent):
self.ou_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros([self.params_size]), sigma=0.005)
else:
self.add_ou_noise = False
self.load_model(load_dir)
# def get_ave_max_q(self, state):
# with torch.no_grad():
......
......@@ -6,6 +6,9 @@ from algorithms.pdqn import PDQN
from algorithms.hppo import HPPO
from envs.soccer_score_goal import SoccerScoreGoalEnv
from envs.soccer_against_keeper import SoccerAgainstKeeperEnv
import yaml
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser()
......@@ -13,10 +16,11 @@ if __name__ == '__main__':
parser.add_argument('--load_dir', type=str, default=None)
parser.add_argument('--render', default=False, action='store_true')
parser.add_argument('--no_sync', default=False, action='store_true')
parser.add_argument('--algo', type=str, default='hppo', choices=['hppo', 'pdqn'])
parser.add_argument('--defense_npc', default=False, action='store_true')
args = parser.parse_args()
with open(os.path.join(args.load_dir, 'config.yaml'), 'r') as f:
args.__dict__.update(yaml.load(f, Loader=yaml.Loader))
sync = not (args.render and args.no_sync)
if args.defense_npc:
env = SoccerAgainstKeeperEnv(sync)
......@@ -29,8 +33,9 @@ if __name__ == '__main__':
else:
agent = PDQN(env, args, load=True)
agent.load_model(os.path.join(args.load_dir, 'models'))
state_size = env.observation_space.shape[0]
frame_stack = agent.frame_stack
frame_stack = args.frame_stack
q = deque(maxlen=frame_stack)
test_r = 0
......@@ -41,7 +46,7 @@ if __name__ == '__main__':
for t in itertools.count():
stack_state = np.array(q).flatten()
avail_actions = env.get_avail_actions()
hybrid_action, action, params = agent.choose_action(stack_state, avail_actions, explore=False)[:3]
hybrid_action = agent.choose_action(stack_state, avail_actions, explore=False)[0]
next_state, reward, done, info = env.step(hybrid_action)
q.append(next_state)
test_r += reward
......
......@@ -46,9 +46,10 @@ if __name__ == '__main__':
parser.add_argument('--defense_npc', default=False, action='store_true')
args = parser.parse_args()
writer = SummaryWriter()
writer = SummaryWriter(logdir=args.save_dir)
logdir = writer.logdir
with open(os.path.join(logdir, 'HPPO_config.yaml'), 'w') as f:
args.algo = 'hppo'
with open(os.path.join(logdir, 'config.yaml'), 'w') as f:
f.write(yaml.dump(args.__dict__, ))
num_iteration = args.num_iteration
......@@ -123,6 +124,6 @@ if __name__ == '__main__':
writer.add_scalar('entropy1/iteration', entropy1, iteration)
writer.add_scalar('entropy2/iteration', entropy2, iteration)
if (iteration + 1) % args.save_interval == 0:
agent.save_model(args.save_dir)
agent.save_model(os.path.join(args.save_dir, 'models'))
writer.close()
......@@ -40,9 +40,10 @@ if __name__ == '__main__':
parser.add_argument('--defense_npc', default=False, action='store_true')
args = parser.parse_args()
writer = SummaryWriter()
writer = SummaryWriter(args.save_dir)
logdir = writer.logdir
with open(os.path.join(logdir, 'PDQN_config.yaml'), 'w') as f:
args.algo='pdqn'
with open(os.path.join(logdir, 'config.yaml'), 'w') as f:
yaml.dump(args.__dict__, f)
if args.defense_npc:
env = SoccerAgainstKeeperEnv()
......@@ -85,7 +86,7 @@ if __name__ == '__main__':
writer.add_scalar('reward/episode', avg_return, ep + 1)
writer.add_scalar('episode_length/episode', np.mean(lens[-args.save_interval:]), ep + 1)
if avg_return > max_return:
agent.save_model(args.save_dir)
agent.save_model(os.path.join(args.save_dir, 'models'))
max_return = avg_return
writer.close()
algo: hppo
batch_size: 2048
coef_entropy: [0.001, 0.001]
defense_npc: false
epochs: 4
epsilon: 0.2
frame_stack: 1
gae_lam: 0.97
gamma: 0.99
hidden_size: [256, 256, 256, 256]
lr_a: 0.0001
lr_c: 0.0002
n_mini_batch: 4
num_iteration: 10000
save_dir: runs/hppo_1v0
save_interval: 10
algo: hppo
batch_size: 2048
coef_entropy: [0.001, 0.001]
defense_npc: true
epochs: 4
epsilon: 0.2
frame_stack: 1
gae_lam: 0.97
gamma: 0.99
hidden_size: [256, 256, 256, 256]
lr_a: 0.0001
lr_c: 0.0002
n_mini_batch: 4
num_iteration: 10000
save_dir: runs/hppo_1v1
save_interval: 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment