Commit d5c0daea authored by Jiakai Song's avatar Jiakai Song
Browse files

commit

parent eff3b01f
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pdqn_hppo.iml" filepath="$PROJECT_DIR$/.idea/pdqn_hppo.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="0cf971cc-497f-4c74-9aa1-f78fca398209" name="Default Changelist" comment="">
<change beforePath="$PROJECT_DIR$/envs/soccer_against_keeper.py" beforeDir="false" afterPath="$PROJECT_DIR$/envs/soccer_against_keeper.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/envs/soccer_empty_goal.py" beforeDir="false" afterPath="$PROJECT_DIR$/envs/soccer_empty_goal.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/envs/soccer_env.py" beforeDir="false" afterPath="$PROJECT_DIR$/envs/soccer_env.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/envs/soccer_score_goal.py" beforeDir="false" afterPath="$PROJECT_DIR$/envs/soccer_score_goal.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="ProjectId" id="1xZV1wYaBeUu8sFifqAunj5kqWf" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showExcludedFiles" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="0cf971cc-497f-4c74-9aa1-f78fca398209" name="Default Changelist" comment="" />
<created>1630562385716</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1630562385716</updated>
</task>
<servers />
</component>
<component name="WindowStateProjectService">
<state x="485" y="174" key="SettingsEditor" timestamp="1630562526490">
<screen x="65" y="24" width="1855" height="1056" />
</state>
<state x="485" y="174" key="SettingsEditor/65.24.1855.1056@65.24.1855.1056" timestamp="1630562526490" />
</component>
</project>
\ No newline at end of file
......@@ -18,45 +18,9 @@ class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
"""
def __init__(self, sync=True, offense_on_ball=False):
def __init__(self, sync=True, action_level='mid', offense_on_ball=False):
self.offense_on_ball = int(offense_on_ball)
super(SoccerAgainstKeeperEnv, self).__init__(sync=sync)
low0 = np.array([-1, -1, 0], dtype=np.float32)
high0 = np.array([1, 1, 3], dtype=np.float32)
low1 = np.array([-1, -1], dtype=np.float32)
high1 = np.array([1, 1], dtype=np.float32)
low2 = np.array([-1, -1], dtype=np.float32)
high2 = np.array([1, 1], dtype=np.float32)
low3 = -1.0
high3 = 1.0
self.action_space = spaces.Tuple((spaces.Discrete(4),
spaces.Box(low=low0, high=high0, dtype=np.float32),
spaces.Box(low=low1, high=high1, dtype=np.float32),
spaces.Box(low=low2, high=high2, dtype=np.float32),
spaces.Box(low=low3, high=high3, dtype=np.float32, shape=(0,)),
))
def get_avail_actions(self):
avail_actions = np.ones([4])
state = self.env.getState()
if int(state[12]) != 1:
avail_actions[[0, 3]] = 0 # KICK_TO, SHOOT unavailable
return avail_actions
def _take_action(self, action):
""" Converts the action space into an HFO action. """
action_type = ACTION_LOOKUP[action[0]]
if action_type == hfo_py.KICK_TO:
self.env.act(action_type, action[1], action[2], action[3])
elif action_type == hfo_py.MOVE_TO:
self.env.act(action_type, action[4], action[5])
elif action_type == hfo_py.DRIBBLE_TO:
self.env.act(action_type, action[6], action[7])
elif action_type == hfo_py.SHOOT:
self.env.act(action_type)
else:
print('Unrecognized action %d' % action_type)
self.env.act(hfo_py.NOOP)
super(SoccerAgainstKeeperEnv, self).__init__(sync, action_level)
def _configure_environment(self):
super(SoccerAgainstKeeperEnv, self)._start_hfo_server(defense_npcs=1,
......@@ -102,20 +66,6 @@ class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
# print(self.env.getUnum())
reward = 0
if not self.first_step:
'''# Reward the agent for moving towards the ball
reward += ball_prox_delta
if kickable_delta > 0 and not self.got_kickable_reward:
reward += 1.
self.got_kickable_reward = True
# Reward the agent for kicking towards the goal
reward += 0.6 * -ball_dist_goal_delta
# Reward the agent for scoring
if self.status == hfo_py.GOAL:
reward += 5.0'''
'''reward = self.__move_to_ball_reward(kickable_delta, ball_prox_delta) + \
3. * self.__kick_to_goal_reward(ball_dist_goal_delta) + \
self.__EOT_reward();'''
mtb = self._move_to_ball_reward(kickable_delta, ball_prox_delta)
ktg = 3. * self._kick_to_goal_reward(ball_dist_goal_delta)
eot = self._EOT_reward()
......@@ -127,12 +77,3 @@ class SoccerAgainstKeeperEnv(SoccerScoreGoalEnv):
self.first_step = False
# print("r =",reward)
return reward
ACTION_LOOKUP = {
0 : hfo_py.KICK_TO,
1 : hfo_py.MOVE_TO,
2 : hfo_py.DRIBBLE_TO,
3 : hfo_py.SHOOT,
}
......@@ -18,8 +18,8 @@ class SoccerEmptyGoalEnv(SoccerEnv):
the ball, kicks the ball towards the goal, and scores a goal.
"""
def __init__(self, sync=True):
super(SoccerEmptyGoalEnv, self).__init__(sync=sync)
def __init__(self, sync=True, action_level='low'):
super(SoccerEmptyGoalEnv, self).__init__(sync, action_level)
self.old_ball_prox = 0
self.old_kickable = 0
self.old_ball_dist_goal = 0
......
......@@ -34,7 +34,7 @@ def find_free_port():
class SoccerEnv(gym.Env, utils.EzPickle):
metadata = {'render.modes': ['human']}
def __init__(self, sync=True):
def __init__(self, sync=True, action_level='low'):
self.sync = sync
self.viewer = None
self.server_process = None
......@@ -48,17 +48,35 @@ class SoccerEnv(gym.Env, utils.EzPickle):
self.observation_space = spaces.Box(low=-1, high=1,
shape=((self.env.getStateSize(),)), dtype=np.float32)
# Action space omits the Tackle/Catch actions, which are useful on defense
low0 = np.array([0, -180], dtype=np.float32)
high0 = np.array([100, 180], dtype=np.float32)
low1 = np.array([-180], dtype=np.float32)
high1 = np.array([180], dtype=np.float32)
low2 = np.array([0, -180], dtype=np.float32)
high2 = np.array([100, 180], dtype=np.float32)
self.action_space = spaces.Tuple((spaces.Discrete(3),
spaces.Box(low=low0, high=high0, dtype=np.float32),
spaces.Box(low=low1, high=high1, dtype=np.float32),
spaces.Box(low=low2, high=high2, dtype=np.float32)))
assert action_level in ['low', 'mid']
self._action_level = action_level
if action_level == 'low':
low0 = np.array([0, -180], dtype=np.float32)
high0 = np.array([100, 180], dtype=np.float32)
low1 = np.array([-180], dtype=np.float32)
high1 = np.array([180], dtype=np.float32)
low2 = np.array([0, -180], dtype=np.float32)
high2 = np.array([100, 180], dtype=np.float32)
self.action_space = spaces.Tuple((spaces.Discrete(3),
spaces.Box(low=low0, high=high0, dtype=np.float32),
spaces.Box(low=low1, high=high1, dtype=np.float32),
spaces.Box(low=low2, high=high2, dtype=np.float32)))
else:
low0 = np.array([-1, -1, 0], dtype=np.float32)
high0 = np.array([1, 1, 3], dtype=np.float32)
low1 = np.array([-1, -1], dtype=np.float32)
high1 = np.array([1, 1], dtype=np.float32)
low2 = np.array([-1, -1], dtype=np.float32)
high2 = np.array([1, 1], dtype=np.float32)
low3 = -1.0
high3 = 1.0
self.action_space = spaces.Tuple((spaces.Discrete(4),
spaces.Box(low=low0, high=high0, dtype=np.float32),
spaces.Box(low=low1, high=high1, dtype=np.float32),
spaces.Box(low=low2, high=high2, dtype=np.float32),
spaces.Box(low=low3, high=high3, dtype=np.float32, shape=(0,)),
))
self.status = hfo_py.IN_GAME
self._seed = -1
self.first_episode = True
......@@ -137,7 +155,13 @@ class SoccerEnv(gym.Env, utils.EzPickle):
self.viewer = subprocess.Popen(cmd.split(' '), shell=False)
def get_avail_actions(self):
avail_actions = np.ones([3])
if self._action_level == 'low':
avail_actions = np.ones([3])
else:
avail_actions = np.ones([4])
state = self.env.getState()
if int(state[12]) != 1:
avail_actions[[0, 3]] = 0 # KICK_TO, SHOOT unavailable
return avail_actions
def step(self, action):
......@@ -150,16 +174,30 @@ class SoccerEnv(gym.Env, utils.EzPickle):
def _take_action(self, action):
""" Converts the action space into an HFO action. """
action_type = ACTION_LOOKUP[action[0]]
if action_type == hfo_py.DASH:
self.env.act(action_type, action[1], action[2])
elif action_type == hfo_py.TURN:
self.env.act(action_type, action[3])
elif action_type == hfo_py.KICK:
self.env.act(action_type, action[4], action[5])
if self._action_level == 'low':
action_type = LOW_LEVEL_ACTION_LOOKUP[action[0]]
if action_type == hfo_py.DASH:
self.env.act(action_type, action[1], action[2])
elif action_type == hfo_py.TURN:
self.env.act(action_type, action[3])
elif action_type == hfo_py.KICK:
self.env.act(action_type, action[4], action[5])
else:
print('Unrecognized action %d' % action_type)
self.env.act(hfo_py.NOOP)
else:
print('Unrecognized action %d' % action_type)
self.env.act(hfo_py.NOOP)
action_type = MID_LEVEL_ACTION_LOOKUP[action[0]]
if action_type == hfo_py.KICK_TO:
self.env.act(action_type, action[1], action[2], action[3])
elif action_type == hfo_py.MOVE_TO:
self.env.act(action_type, action[4], action[5])
elif action_type == hfo_py.DRIBBLE_TO:
self.env.act(action_type, action[6], action[7])
elif action_type == hfo_py.SHOOT:
self.env.act(action_type)
else:
print('Unrecognized action %d' % action_type)
self.env.act(hfo_py.NOOP)
def _get_reward(self):
""" Reward is given for scoring a goal. """
......@@ -201,12 +239,17 @@ class ServerDownException(Exception):
pass
ACTION_LOOKUP = {
0 : hfo_py.DASH,
1 : hfo_py.TURN,
2 : hfo_py.KICK,
3 : hfo_py.TACKLE, # Used on defense to slide tackle the ball
4 : hfo_py.CATCH, # Used only by goalie to catch the ball
LOW_LEVEL_ACTION_LOOKUP = {
0: hfo_py.DASH,
1: hfo_py.TURN,
2: hfo_py.KICK,
}
MID_LEVEL_ACTION_LOOKUP = {
0: hfo_py.KICK_TO,
1: hfo_py.MOVE_TO,
2: hfo_py.DRIBBLE_TO,
3: hfo_py.SHOOT
}
STATUS_LOOKUP = {
......
......@@ -23,8 +23,8 @@ class SoccerScoreGoalEnv(SoccerEmptyGoalEnv):
Action Spaces".
"""
def __init__(self, sync=True):
super(SoccerScoreGoalEnv, self).__init__(sync=sync)
def __init__(self, sync=True, action_level='low'):
super(SoccerScoreGoalEnv, self).__init__(sync, action_level)
# dash, turn, kick, tackle
self.unum = self.env.getUnum() # uniform number (identifier) of our lone agent
print("UNUM =", self.unum)
......@@ -69,19 +69,6 @@ class SoccerScoreGoalEnv(SoccerEmptyGoalEnv):
#print(self.env.getUnum())
reward = 0
if not self.first_step:
'''# Reward the agent for moving towards the ball
reward += ball_prox_delta
if kickable_delta > 0 and not self.got_kickable_reward:
reward += 1.
self.got_kickable_reward = True
# Reward the agent for kicking towards the goal
reward += 0.6 * -ball_dist_goal_delta
# Reward the agent for scoring
if self.status == hfo_py.GOAL:
reward += 5.0'''
'''reward = self.__move_to_ball_reward(kickable_delta, ball_prox_delta) + \
3. * self.__kick_to_goal_reward(ball_dist_goal_delta) + \
self.__EOT_reward();'''
mtb = self._move_to_ball_reward(kickable_delta, ball_prox_delta)
ktg = 3. * self._kick_to_goal_reward(ball_dist_goal_delta)
eot = self._EOT_reward()
......@@ -114,4 +101,4 @@ class SoccerScoreGoalEnv(SoccerEmptyGoalEnv):
#elif self.status == hfo_py.CAPTURED_BY_DEFENSE:
# return -1.
return 0.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment