diff --git a/__pycache__/PPO.cpython-310.pyc b/__pycache__/PPO.cpython-310.pyc new file mode 100644 index 0000000..00b2a8a Binary files /dev/null and b/__pycache__/PPO.cpython-310.pyc differ diff --git a/builddata.py b/builddata.py new file mode 100644 index 0000000..dc10b8f --- /dev/null +++ b/builddata.py @@ -0,0 +1,80 @@ + +import pickle +import random +import os + +# Define the paths to the pickle files +paths = {'optimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl', + 'suboptimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl', + 'optimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl', + 'suboptimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl', + 'optimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl', + 'suboptimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl', + 'optimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl', + 'suboptimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl', + 'optimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl', + 'suboptimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl', + 'optimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl', + 'suboptimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl'} + + +all_list = [] +# Load the lists from pickle files + + +def select_random_elements(input_list, percentage): + num_elements = int(len(input_list) * percentage / 100) + return random.sample(input_list, num_elements) + +def combine(path1, path2, path3): + with open(path1, 'rb') as f: + list1 = pickle.load(f) + + with open(path2, 'rb') as f: + list2 = pickle.load(f) + + with open(path3, 'rb') as f: + list3 = pickle.load(f) + print(len(list1), len(list2), len(list3)) + selected_list1 = select_random_elements(list1, 33) + selected_list2 = select_random_elements(list2, 33) + selected_list3 = select_random_elements(list3, 34) + + combined_list = selected_list1 + selected_list2 + selected_list3 + random.shuffle(combined_list) + print(len(combined_list)) + # Print the combined list + return combined_list + +combined_list1 = combine(paths['optimalfast1038'], paths['optimalfast1538'], paths['optimalfast2038']) +combined_list2 = combine(paths['suboptimalfast1038'], paths['suboptimalfast1538'], paths['suboptimalfast2038']) +combined_list3 = combine(paths['optimalslow1038'], paths['optimalslow1538'], paths['optimalslow2038']) +combined_list4 = combine(paths['suboptimalslow1038'], paths['suboptimalslow1538'], paths['suboptimalslow2038']) + + + +directory1 = 'new_data/ContinuousFastRandom-v0' +directory2 = 'new_data/ContinuousSlowRandom-v0' + +if not os.path.exists(directory1): + os.makedirs(directory1) +if not os.path.exists(directory2): + os.makedirs(directory2) + +path1 = directory1 + '/combined_list_fast_optimal.pkl' +path2 = directory1 + '/combined_list_fast_suboptimal.pkl' +path3 = directory2 + '/combined_list_slow_optimal.pkl' +path4 = directory2 + '/combined_list_slow_suboptimal.pkl' + +with open(path1, 'wb') as f: + pickle.dump(combined_list1, f) +with open(path2, 'wb') as f: + pickle.dump(combined_list2, f) +with open(path3, 'wb') as f: + pickle.dump(combined_list3, f) +with open(path4, 'wb') as f: + pickle.dump(combined_list4, f) +print('done') + + + diff --git a/demo_code.py b/demo_code.py new file mode 100644 index 0000000..dca6333 --- /dev/null +++ b/demo_code.py @@ -0,0 +1,216 @@ +import os +import glob +import time +from datetime import datetime + +import torch +import numpy as np +import time +import gym +import pickle +# import roboschool +import driving +import argparse +from PPO import PPO + + +#################################### Testing ################################### +def test(): + print("============================================================================================") + + ################## hyperparameters ################## + parser = argparse.ArgumentParser(description='Test the model') + parser.add_argument('--seed', type=int, default=1001) + parser.add_argument('--goalx', type=int, default=15) + parser.add_argument('--goaly', type=int, default=38) + parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0') + parser.add_argument('--render', action='store_true') + parser.add_argument('--use-sleep', action='store_true') + parser.add_argument('--optimal', action='store_true') + parser.add_argument('--suboptimal', action='store_true') + parser.add_argument('--num_episodes', type=int, default=10) + parser.add_argument('--max_num_samples', type=int, default=1000) + parser.add_argument('--threshold', type=float, default=0) + parser.add_argument('--dontsave', action='store_true') + args = parser.parse_args() + # args.env = "CartPole-v1" + # has_continuous_action_space = False + # max_ep_len = 400 + # action_std = None + + # args.env = "LunarLander-v2" + # has_continuous_action_space = False + # max_ep_len = 300 + # action_std = None + + # args.env = "BipedalWalker-v2" + # has_continuous_action_space = True + # max_ep_len = 1500 # max timesteps in one episode + # action_std = 0.1 # set same std for action distribution which was used while saving + + has_continuous_action_space = True + max_ep_len = 1000 # max timesteps in one episode + action_std = 0.1 # set same std for action distribution which was used while saving + + # delay = True # add delay b/w frames to make video like real time + # render = True # render environment on screen + frame_delay = 0 # if required; add delay b/w frames + + # total_test_episodes = 10 # total num of testing episodes + + K_epochs = 80 # update policy for K epochs + eps_clip = 0.2 # clip parameter for PPO + gamma = 0.99 # discount factor + + lr_actor = 0.0003 # learning rate for actor + lr_critic = 0.001 + # learning rate for critic + run_best_model = False + if args.optimal: + run_best_model = True + # load and run the best saved model + ##################################################### + + env = gym.make(args.env) + env.set_goal(args.goalx, args.goaly) + + # state space dimension + state_dim = env.observation_space.shape[0] + + # action space dimension + if has_continuous_action_space: + action_dim = env.action_space.shape[0] + else: + action_dim = env.action_space.n + + # initialize a PPO agent + ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std) + + # preTrained weights directory + + random_seed = 0 #### set this to load a particular checkpoint trained on random seed + run_num_pretrained = 0 #### set this to load a particular checkpoint num + + directory = "PPO_preTrained" + '/' + args.env + '/' + data_directory = "new_data" + if not os.path.exists(directory): + print("No directory found") + exit() + if not os.path.exists(data_directory): + os.makedirs(data_directory) + data_directory = data_directory + '/' + args.env + '/' + if not os.path.exists(data_directory): + os.makedirs(data_directory) + + if not run_best_model: + checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + else: + checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}best.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + + optimal_data_path = data_directory + "optimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + suboptimal_data_path = data_directory + "suboptimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + print("loading network from : " + checkpoint_path) + + ppo_agent.load(checkpoint_path) + + print("--------------------------------------------------------------------------------------------") + + test_running_reward = 0 + dataload = [] + num_optimal = 1 + num_suboptimal = 1 + for ep in range(1, args.num_episodes+1): + ep_reward = 0 + state = env.reset() + state_dict = {'state' : [], 'action': [], 'reward': [], 'optimal': []} + state_dict['state'].append(state) + for t in range(1, max_ep_len+1): + action = ppo_agent.select_action(state) + print(action.dtype) + state, reward, done, _, _= env.step(action) + ep_reward += reward + state_dict['state'].append(state) + state_dict['action'].append(action) + state_dict['reward'].append(reward) + if args.render: + env.render() + # time.sleep(frame_delay) + if args.use_sleep: + time.sleep(0.05) + if done: + break + + # clear buffer + ppo_agent.buffer.clear() + if num_optimal > args.max_num_samples and args.optimal and not args.suboptimal: + break + if num_suboptimal > args.max_num_samples and args.suboptimal and not args.optimal: + break + if num_optimal > args.max_num_samples and num_suboptimal > args.max_num_samples and args.optimal and args.suboptimal: + break + if ep_reward > args.threshold and args.optimal: + state_dict['optimal'] = [True] * len(state_dict['action']) + num_optimal += 1 + dataload.append(state_dict) + if ep_reward <= args.threshold and args.suboptimal: + state_dict['optimal'] = [False] * len(state_dict['action']) + num_suboptimal += 1 + dataload.append(state_dict) + test_running_reward += ep_reward + print('Episode: {} \t\t Reward: {}'.format(ep, round(ep_reward, 2))) + + ep_reward = 0 + + env.close() + + print("============================================================================================") + + avg_test_reward = test_running_reward / args.num_episodes + avg_test_reward = round(avg_test_reward, 2) + print("average test reward : " + str(avg_test_reward)) + if not args.dontsave and args.optimal: + with open(optimal_data_path, 'wb') as file: + pickle.dump(dataload, file) + print("optimal data saved at : " + optimal_data_path + " with " + str(num_optimal) + " samples") + if not args.dontsave and args.suboptimal: + with open(suboptimal_data_path, 'wb') as file: + pickle.dump(dataload, file) + print("suboptimal data saved at : " + suboptimal_data_path + " with " + str(num_suboptimal) + " samples") + print("============================================================================================") + print(type(dataload)) + for i in range(args.num_episodes): + accumulator = 0 + accumulator2 = 0 + initial_state = dataload[i]['state'][0] + env.reset(goal = initial_state[7:9]) + print('goal location', initial_state[7:9]) + env.reset_with_obs(initial_state) + # print("initial state ", initial_state) + # print("initial state as per the model ", env.get_obs()) + assert np.allclose(env.get_obs(), initial_state) + if args.render: + env.render() + for step in range(len(dataload[i]['reward'])): + action = dataload[i]['action'][step] + print(action.dtype) + next_state, reward, done, _, info= env.step(action) + print("next state as per the model ", next_state) + print("next state as per the data ", dataload[i]['state'][step+1]) + try: + assert np.allclose(next_state, dataload[i]['state'][step+1]) + except: + print("the two states are not the same ", next_state, dataload[i]['state'][step+1]) + accumulator += dataload[i]['reward'][step] + accumulator2 += reward + if args.render: + env.render() + if args.use_sleep: + time.sleep(0.05) + time.sleep(0.1) + print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2)) + env.close() + + +if __name__ == '__main__': + + test() diff --git a/imperfect_envs/README.md b/imperfect_envs/README.md new file mode 100644 index 0000000..e69de29 diff --git a/imperfect_envs/driving/__init__.py b/imperfect_envs/driving/__init__.py new file mode 100644 index 0000000..24e6044 --- /dev/null +++ b/imperfect_envs/driving/__init__.py @@ -0,0 +1,9 @@ +from gym.envs.registration import register + +register(id="Continuous-v0", entry_point="driving.envs:GridworldContinuousEnv") +# register(id="ContinuousRandom-v0", entry_point="driving.envs:GridworldContinuousRandomInitEnv") +# register(id="ContinuousRandom1-v0", entry_point="driving.envs:GridworldContinuousRandomInitEnv1",max_episode_steps=400) +# register(id="ContinuousLeftRandom1-v0", entry_point="driving.envs:GridworldContinuousLeftRandomInitEnv1") +# register(id="ContinuousRightRandom1-v0", entry_point="driving.envs:GridworldContinuousRightRandomInitEnv1") +register(id="ContinuousFastRandom-v0", entry_point="driving.envs:GridworldContinuousFastRandomInitEnv",max_episode_steps=400) +register(id="ContinuousSlowRandom-v0", entry_point="driving.envs:GridworldContinuousSlowRandomInitEnv",max_episode_steps=400) diff --git a/imperfect_envs/driving/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/driving/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..09a0de3 Binary files /dev/null and b/imperfect_envs/driving/__pycache__/__init__.cpython-310.pyc differ diff --git a/imperfect_envs/driving/__pycache__/agents.cpython-310.pyc b/imperfect_envs/driving/__pycache__/agents.cpython-310.pyc new file mode 100644 index 0000000..79617b3 Binary files /dev/null and b/imperfect_envs/driving/__pycache__/agents.cpython-310.pyc differ diff --git a/imperfect_envs/driving/__pycache__/entities.cpython-310.pyc b/imperfect_envs/driving/__pycache__/entities.cpython-310.pyc new file mode 100644 index 0000000..3298097 Binary files /dev/null and b/imperfect_envs/driving/__pycache__/entities.cpython-310.pyc differ diff --git a/imperfect_envs/driving/__pycache__/geometry.cpython-310.pyc b/imperfect_envs/driving/__pycache__/geometry.cpython-310.pyc new file mode 100644 index 0000000..8027f5f Binary files /dev/null and b/imperfect_envs/driving/__pycache__/geometry.cpython-310.pyc differ diff --git a/imperfect_envs/driving/__pycache__/graphics.cpython-310.pyc b/imperfect_envs/driving/__pycache__/graphics.cpython-310.pyc new file mode 100644 index 0000000..775b215 Binary files /dev/null and b/imperfect_envs/driving/__pycache__/graphics.cpython-310.pyc differ diff --git a/imperfect_envs/driving/__pycache__/visualizer.cpython-310.pyc b/imperfect_envs/driving/__pycache__/visualizer.cpython-310.pyc new file mode 100644 index 0000000..7d3898f Binary files /dev/null and b/imperfect_envs/driving/__pycache__/visualizer.cpython-310.pyc differ diff --git a/imperfect_envs/driving/__pycache__/world.cpython-310.pyc b/imperfect_envs/driving/__pycache__/world.cpython-310.pyc new file mode 100644 index 0000000..0ffd222 Binary files /dev/null and b/imperfect_envs/driving/__pycache__/world.cpython-310.pyc differ diff --git a/imperfect_envs/driving/agents.py b/imperfect_envs/driving/agents.py new file mode 100644 index 0000000..afd3c2b --- /dev/null +++ b/imperfect_envs/driving/agents.py @@ -0,0 +1,62 @@ +from driving.entities import RectangleEntity, CircleEntity +from driving.geometry import Point + +# For colors, we use tkinter colors. See http://www.science.smith.edu/dftwiki/index.php/Color_Charts_for_TKinter + + +class Car(RectangleEntity): + def __init__( + self, + center: Point, + heading: float, + color: str = "red", + min_acc: float = -4.0, + max_acc: float = 4.0, + ): + size = Point(2.0, 1.0) + movable = True + friction = 0.06 + super(Car, self).__init__( + center, heading, size, movable, friction, min_acc=min_acc, max_acc=max_acc + ) + self.color = color + self.collidable = True + + +class Pedestrian(CircleEntity): + def __init__(self, center: Point, heading: float, color: str = "LightSalmon2"): + radius = 0.4 + movable = True + friction = 0.2 + super(Pedestrian, self).__init__(center, heading, radius, movable, friction) + self.color = color + self.collidable = True + + +class Building(RectangleEntity): + def __init__(self, center: Point, size: Point, color: str = "gray26", heading=0.0): + movable = False + friction = 0.0 + super(Building, self).__init__(center, heading, size, movable, friction) + self.color = color + self.collidable = True + + +class Painting(RectangleEntity): + def __init__(self, center: Point, size: Point, color: str = "gray26"): + heading = 0.0 + movable = False + friction = 0.0 + super(Painting, self).__init__(center, heading, size, movable, friction) + self.color = color + self.collidable = False + + +class Goal(RectangleEntity): + def __init__(self, center: Point, radius: float, heading: float, color: str = "LightSalmon2"): + size = Point(radius, radius) + movable = True + friction = 0.2 + super(Goal, self).__init__(center, heading, size, movable, friction) + self.color = color + self.collidable = True diff --git a/imperfect_envs/driving/entities.py b/imperfect_envs/driving/entities.py new file mode 100644 index 0000000..aa58c7a --- /dev/null +++ b/imperfect_envs/driving/entities.py @@ -0,0 +1,241 @@ +import math +from typing import Text, Union +import numpy as np +from driving.geometry import Point, Rectangle, Circle +import copy + + +def get_entity_dynamics(friction, min_speed, max_speed, min_acc, max_acc, xnp=np): + # xnp: Either numpy or jax.numpy. + + def entity_dynamics(x, u, dt): + # x: (x, y, xp, yp, theta, ang_vel, acceleration) + # u: (steering angle, acceleration) + center = x[:2] + velocity = x[2:4] + speed = xnp.linalg.norm(velocity, ord=2) + heading = x[4] + angular_velocity = x[5] + old_acceleration = x[6] + steering_angle = u[0] + acceleration = xnp.clip(u[1], min_acc, max_acc) + + new_angular_velocity = speed * steering_angle + new_acceleration = acceleration - friction * speed + + new_heading = heading + (angular_velocity + new_angular_velocity) * dt / 2.0 + new_speed = xnp.clip( + speed + (old_acceleration + new_acceleration) * dt / 2.0, min_speed, max_speed + ) + + next_speed = (speed + new_speed) / 2.0 + next_heading = (new_heading + heading) / 2.0 + new_velocity = next_speed * xnp.array((xnp.cos(next_heading), xnp.sin(next_heading))) + + new_center = center + (velocity + new_velocity) * dt / 2.0 + return xnp.concatenate( + ( + new_center, + new_velocity, + xnp.stack([new_heading, new_angular_velocity, new_acceleration]), + ) + ) + + return entity_dynamics + + +class Entity: + def __init__( + self, + center: Point, + heading: float, + movable: bool = True, + friction: float = 0.0, + min_speed: float = 0.0, + max_speed: float = math.inf, + min_acc: float = -math.inf, + max_acc: float = math.inf, + ): + self.center = center # this is x, y + self.heading = heading + self.movable = movable + self.color = "ghost white" + self.collidable = True + self.obj = None # MUST be set by subclasses. + if movable: + self.friction = friction + self.velocity = Point(0, 0) # this is xp, yp + self.acceleration = 0 # this is vp (or speedp) + self.angular_velocity = 0 # this is headingp + self.inputSteering = 0 + self.inputAcceleration = 0 + self.min_speed = min_speed + self.max_speed = max_speed + self.min_acc = min_acc + self.max_acc = max_acc + self.entity_dynamics = get_entity_dynamics( + friction, self.min_speed, self.max_speed, self.min_acc, self.max_acc, xnp=np + ) + + @property + def speed(self) -> float: + return self.velocity.norm(p=2) if self.movable else 0 + + def set_control(self, inputSteering: float, inputAcceleration: float): + self.inputSteering = inputSteering + self.inputAcceleration = inputAcceleration + + @property + def state(self): + return np.array( + ( + self.x, + self.y, + self.xp, + self.yp, + self.heading, + self.angular_velocity, + self.acceleration, + ) + ) + + @state.setter + def state(self, new_x): + self.center = Point(new_x[0], new_x[1]) + self.velocity = Point(new_x[2], new_x[3]) + self.heading = new_x[4] + self.angular_velocity = new_x[5] + self.acceleration = new_x[6] + self.buildGeometry() + + def tick(self, dt: float): + if self.movable: + x = self.state + u = np.array((self.inputSteering, self.inputAcceleration)) + new_x = self.entity_dynamics(x, u, dt) + self.state = new_x + + def buildGeometry(self): # builds the obj + raise NotImplementedError + + def collidesWith(self, other: Union["Point", "Entity"]) -> bool: + if isinstance(other, Entity): + return self.obj.intersectsWith(other.obj) + elif isinstance(other, Point): + return self.obj.intersectsWith(other) + else: + raise NotImplementedError + + def distanceTo(self, other: Union["Point", "Entity"]) -> float: + if isinstance(other, Entity): + return self.obj.distanceTo(other.obj) + elif isinstance(other, Point): + return self.obj.distanceTo(other) + else: + raise NotImplementedError + + def copy(self): + return copy.deepcopy(self) + + @property + def x(self): + return self.center.x + + @property + def y(self): + return self.center.y + + @property + def xp(self): + return self.velocity.x + + @property + def yp(self): + return self.velocity.y + + +class RectangleEntity(Entity): + def __init__( + self, + center: Point, + heading: float, + size: Point, + movable: bool = True, + friction: float = 0, + **kwargs + ): + super(RectangleEntity, self).__init__(center, heading, movable, friction, **kwargs) + self.size = size + self.buildGeometry() + + @property + def edge_centers(self): + edge_centers = np.zeros((4, 2), dtype=np.float32) + x = self.center.x + y = self.center.y + w = self.size.x + h = self.size.y + edge_centers[0] = [ + x + w / 2.0 * np.cos(self.heading), + y + w / 2.0 * np.sin(self.heading), + ] + edge_centers[1] = [ + x - h / 2.0 * np.sin(self.heading), + y + h / 2.0 * np.cos(self.heading), + ] + edge_centers[2] = [ + x - w / 2.0 * np.cos(self.heading), + y - w / 2.0 * np.sin(self.heading), + ] + edge_centers[3] = [ + x + h / 2.0 * np.sin(self.heading), + y - h / 2.0 * np.cos(self.heading), + ] + return edge_centers + + @property + def corners(self): + ec = self.edge_centers + c = np.array([self.center.x, self.center.y]) + corners = [] + corners.append(Point(*(ec[1] + ec[0] - c))) + corners.append(Point(*(ec[2] + ec[1] - c))) + corners.append(Point(*(ec[3] + ec[2] - c))) + corners.append(Point(*(ec[0] + ec[3] - c))) + return corners + + def buildGeometry(self): + C = self.corners + self.obj = Rectangle(*C[:-1]) # pylint: disable=no-value-for-parameter + + #def distanceTo(self, other): + # return np.linalg.norm(np.array([self.center.x, self.center.y]) - np.array([other.center.x, other.center.y]), ord=1) + + +class CircleEntity(Entity): + def __init__( + self, + center: Point, + heading: float, + radius: float, + movable: bool = True, + friction: float = 0, + **kwargs + ): + super(CircleEntity, self).__init__(center, heading, movable, friction, **kwargs) + self.radius = radius + self.buildGeometry() + + def buildGeometry(self): + self.obj = Circle(self.center, self.radius) + + +class TextEntity(Entity): + def __init__(self, center: Point, **kwargs): + heading = 0 + super(TextEntity, self).__init__(center, heading, movable=False, **kwargs) + self.text = "" + + def buildGeometry(self): + # Represent text geometry as a tiny circle. Not accurate. + self.obj = Circle(self.center, 0.01) diff --git a/imperfect_envs/driving/envs/__init__.py b/imperfect_envs/driving/envs/__init__.py new file mode 100644 index 0000000..58a2929 --- /dev/null +++ b/imperfect_envs/driving/envs/__init__.py @@ -0,0 +1 @@ +from driving.envs.gridworld_continuous import GridworldContinuousEnv, GridworldContinuousSlowRandomInitEnv, GridworldContinuousFastRandomInitEnv, PidVelPolicy diff --git a/imperfect_envs/driving/envs/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/driving/envs/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..23e7d87 Binary files /dev/null and b/imperfect_envs/driving/envs/__pycache__/__init__.cpython-310.pyc differ diff --git a/imperfect_envs/driving/envs/__pycache__/gridworld_continuous.cpython-310.pyc b/imperfect_envs/driving/envs/__pycache__/gridworld_continuous.cpython-310.pyc new file mode 100644 index 0000000..e0f4083 Binary files /dev/null and b/imperfect_envs/driving/envs/__pycache__/gridworld_continuous.cpython-310.pyc differ diff --git a/imperfect_envs/driving/envs/gridworld_continuous.py b/imperfect_envs/driving/envs/gridworld_continuous.py new file mode 100644 index 0000000..eecd7f6 --- /dev/null +++ b/imperfect_envs/driving/envs/gridworld_continuous.py @@ -0,0 +1,260 @@ +import io +from typing import Text +import gym +from gym import spaces +from PIL import Image +import numpy as np +import scipy.special +from driving.world import World +from driving.entities import TextEntity, Entity +from driving.agents import Car, Building, Goal +from driving.geometry import Point +from typing import Tuple +import sys +import random + +class PidVelPolicy: + """PID controller for H that maintains its initial velocity.""" + + def __init__(self, dt: float, params: Tuple[float, float, float] = (3.0, 1.0, 6.0)): + self._target_vel = None + self.previous_error = 0 + self.integral = 0 + self.errors = [] + self.dt = dt + self.Kp, self.Ki, self.Kd = params + + def action(self, obs): + my_y_dot = obs[3] + if self._target_vel is None: + self._target_vel = my_y_dot + error = self._target_vel - my_y_dot + derivative = (error - self.previous_error) * self.dt + self.integral = self.integral + self.dt * error + acc = self.Kp * error + self.Ki * self.integral + self.Kd * derivative + self.previous_error = error + self.errors.append(error) + return acc + + def reset(self, seed = None): + if seed is not None: + random.seed(seed) + self._target_vel = None + self.previous_error = 0 + self.integral = 0 + self.errors = [] + + def __str__(self): + return "PidVelPolicy({})".format(self.dt) + +class GridworldContinuousEnv(gym.Env): + + def __init__(self, + dt: float = 0.1, + width: int = 30, + height: int = 40, + time_limit: float = 300.0): + super(GridworldContinuousEnv, self).__init__() + self.dt = dt + self.width = width + self.height = height + self.world = World(self.dt, width=width, height=height, ppm=6) + self.accelerate = PidVelPolicy(self.dt) + self.step_num = 0 + self.time_limit = time_limit + self.action_space = spaces.Box( + np.array([-1.]), np.array([1.]), dtype=np.float32 + ) + self.goal_radius = 2. + self.observation_space = spaces.Box(-np.inf, np.inf, shape=(14,)) + self.start = np.array([self.width/2.,self.goal_radius]) + self.goal = np.array([self.width/2., self.height-self.goal_radius]) + self.max_dist = np.linalg.norm(self.goal-self.start,2) + + self.target = [self.height/5., self.height*2./5., self.height*3./5., self.height*4./5., np.inf] + self.obstacle_width = 6. + self.initial_speed = 3. + + def step(self, action: np.ndarray, verbose: bool = False): + self.step_num += 1 + # for i in range(len(action)): + # action[i] = action[i]*0.1 + action = action * 0.1 + # print(action) + car = self.world.dynamic_agents[0] + goal_loc = self.world.dynamic_agents[1] + acc = self.accelerate.action(self._get_obs()) + action = np.append(action, acc) + if self.stop: + action = np.array([0, -5]) + # print(type(car)) + car.set_control(*action) + goal_loc.set_control(0, 0) + self.world.tick() + + reward = self.reward(verbose) + + done = False + if car.y >= self.height or car.y <= 0 or car.x <= 0 or car.x >= self.width: + reward -= 10000 + done = True + if self.step_num >= self.time_limit: + done = True + if self.car.collidesWith(self.goal_obj): + done = True + self.stop = True + #if self.step_num < 6: + # done = False + return self._get_obs(), reward, done, True, {'episode': {'r': reward, 'l': self.step_num}} + + def reset(self, seed = None): + self.world.reset() + self.stop = False + self.target_count = 0 + if seed is not None: + random.seed(seed) + self.buildings = [ + Building(Point(self.width/2., self.height/2.-3), Point(self.obstacle_width,1), "gray80"), + ] + random_dis = random.random()*2. + random_angle = random.random()*2*np.pi + init_x = self.start[0] + random_dis*np.cos(random_angle) + init_y = self.start[1] + random_dis*np.sin(random_angle) + self.car = Car(Point(init_x, init_y), np.pi/2., "blue") + self.car.velocity = Point(0, self.initial_speed) + + self.goal_obj = Goal(Point(self.goal[0], self.goal[1]), self.goal_radius, 0.0) + + for building in self.buildings: + self.world.add(building) + self.world.add(self.car) + self.world.add(self.goal_obj) + + self.last_heading = np.pi / 2 + + self.step_num = 0 + return self._get_obs() + + def reset_with_obs(self, obs): + self.world.reset() + self.stop = False + self.target_count = 0 + + self.buildings = [ + Building(Point(self.width/2., self.height/2.-3), Point(self.obstacle_width,1), "gray80"), + ] + + init_x = (obs[0]/2.+0.5)*self.width + init_y = (obs[1]/2.+0.5)*self.height + self.car = Car(Point(init_x, init_y), np.pi/2., "blue") + self.car.velocity = Point(0, self.initial_speed) + + self.goal_obj = Goal(Point(self.goal[0], self.goal[1]), self.goal_radius, 0.0) + + for building in self.buildings: + self.world.add(building) + self.world.add(self.car) + self.world.add(self.goal_obj) + + self.last_heading = np.pi / 2 + + self.step_num = 0 + return self._get_obs() + + def _get_obs(self): + """ + Get state of car + """ + return_state = np.array(self.world.state) + #print(return_state) + return_state[1] = 2.* ((return_state[1] / self.height) - 0.5) + return_state[0] = 2.* ((return_state[0] / self.width) - 0.5) + return_state[2] /= self.initial_speed + return_state[3] /= self.initial_speed + # print("get_obs return state ", return_state) + return return_state + + def inverse_dynamic(self, state, next_state): + return (next_state[-2] / np.linalg.norm(self.initial_speed*state[2:4], ord=2))/self.dt + + def reward(self, verbose, weight=10.0): + dist_rew = -1. # * (self.car.center.distanceTo(self.goal_obj)/self.max_dist) + coll_rew = 0 + for building in self.buildings: + if self.car.collidesWith(building): + coll_rew = -1000. + break + + goal_rew = 0.0 + if self.car.collidesWith(self.goal_obj) and (not self.stop): + goal_rew = 100. + + extra_rew = 0. + #if self.car.x < self.width / 4.: + # extra_rew = (self.width / 4. - self.car.x)/(self.width/4.) * (-1.) + #elif self.car.x > self.width * 3. / 4.: + # extra_rew = (self.car.x-self.width * 3. / 4.)/(self.width/4.) * (-1.) + + reward = sum([dist_rew, coll_rew, extra_rew, goal_rew]) + if verbose: print("dist reward: ", dist_rew, + "goal reward: ", goal_rew, + "extra reward: ", extra_rew, + "reward: ", reward) + return reward + + def render(self): + self.world.render() + + def set_goal(self, x, y): + self.goal = np.array([x, y]) + +class GridworldContinuousSlowRandomInitEnv(GridworldContinuousEnv): + def reset(self, seed = None): + + if seed is not None: + random.seed(seed) + + self.world.reset() + + self.stop = False + self.target_count = 0 + + self.buildings = [ + Building(Point(self.width/2., self.height/2.-3), Point(self.obstacle_width,1), "gray80"), + ] + + while True: + random_w = random.random() + random_h = random.random() + init_x = self.width/2.-(self.obstacle_width/2.+2.) + random_w*(self.obstacle_width+4.) + init_y = self.goal_radius + (self.height-3*self.goal_radius)*random_h + cond1 = abs(init_x - self.width/2.) < (self.obstacle_width/2.+2.) and init_y-self.height/2. < 3. and init_y-self.height/2.>-13. + slope = ((self.height - self.goal_radius) - (self.height/2.-3))/(self.width/4.) + #print(slope, init_x, ((self.width/4.-abs(init_x - self.width/2.)) * slope + (self.height/2.-3.))) + cond2 = init_y < ((self.width/4.-abs(init_x - self.width/2.)) * slope + (self.height/2.-3.)) + if cond2 and not cond1: + break + init_heading = np.pi/2. # np.arctan2(self.goal[1] - init_y, self.goal[0]-init_x) + self.car = Car(Point(init_x, init_y), init_heading, "blue") + self.car.velocity = Point(0, self.initial_speed) + + self.goal_obj = Goal(Point(self.goal[0], self.goal[1]), self.goal_radius, 0.0) + + for building in self.buildings: + self.world.add(building) + self.world.add(self.car) + self.world.add(self.goal_obj) + + self.last_heading = np.pi / 2 + + self.step_num = 0 + return self._get_obs() + +class GridworldContinuousFastRandomInitEnv(GridworldContinuousSlowRandomInitEnv): + def __init__(self, + dt: float = 0.1, + width: int = 30, + height: int = 40, + time_limit: float = 300.0): + super(GridworldContinuousFastRandomInitEnv, self).__init__(dt, width, height, time_limit) + self.initial_speed = 9. diff --git a/imperfect_envs/driving/geometry.py b/imperfect_envs/driving/geometry.py new file mode 100644 index 0000000..196b0fa --- /dev/null +++ b/imperfect_envs/driving/geometry.py @@ -0,0 +1,243 @@ +import numpy as np +from typing import Union + + +class Point: + def __init__(self, x: float, y: float): + self.x = float(x) + self.y = float(y) + + def __str__(self): + return "Point(" + str(self.x) + ", " + str(self.y) + ")" + + def __add__(self, other: "Point") -> "Point": + return Point(self.x + other.x, self.y + other.y) + + def __sub__(self, other: "Point") -> "Point": + return Point(self.x - other.x, self.y - other.y) + + def norm(self, p: int = 2) -> float: + return (self.x ** p + self.y ** p) ** (1.0 / p) + + def dot(self, other: "Point") -> float: + return self.x * other.x + self.y * other.y + + def __mul__(self, other: float) -> "Point": + return Point(other * self.x, other * self.y) + + def __rmul__(self, other: float) -> "Point": + return self.__mul__(other) + + def __truediv__(self, other: float) -> "Point": + return self.__mul__(1.0 / other) + + def isInside(self, other: Union["Line", "Rectangle", "Circle"]) -> bool: + if isinstance(other, Line): + AM = Line(other.p1, self) + MB = Line(self, other.p2) + return np.isclose(np.abs(AM.dot(MB)), AM.length * MB.length) + + elif isinstance(other, Rectangle): + # Based on https://stackoverflow.com/a/2763387 + AB = Line(other.c1, other.c2) + AM = Line(other.c1, self) + BC = Line(other.c2, other.c3) + BM = Line(other.c2, self) + + return 0 <= AB.dot(AM) <= AB.dot(AB) and 0 <= BC.dot(BM) <= BC.dot(BC) + + elif isinstance(other, Circle): + return (self - other.m).norm(p=2) <= other.r + + raise NotImplementedError + + def distanceTo(self, other: Union["Point", "Line", "Rectangle", "Circle"]) -> float: + if isinstance(other, Point): + return (self - other).norm(p=2) + + elif isinstance(other, Line): + # Based on https://math.stackexchange.com/a/330329 + s2_minus_s1 = other.p2 - other.p1 + that = (self - other.p1).dot(s2_minus_s1) / s2_minus_s1.dot(s2_minus_s1) + tstar = np.minimum(1, np.maximum(0, that)) + return (other.p1 + tstar * s2_minus_s1 - self).norm(p=2) + + elif isinstance(other, Rectangle): + if self.isInside(other): + return 0 + E = other.edges + return np.min([self.distanceTo(e) for e in E]) + + elif isinstance(other, Circle): + return np.maximum(0, self.distanceTo(other.m) - other.r) + + else: + try: + return other.distanceTo(self) + except NameError: + raise NotImplementedError + print("Something went wrong!") + + +def onSegment(p: Point, q: Point, r: Point) -> bool: + """ + Given three colinear points p, q, r, the function checks if + point q lies on line segment 'pr' + """ + return ( + q.x <= np.maximum(p.x, r.x) + and q.x >= np.minimum(p.x, r.x) + and q.y <= np.maximum(p.y, r.y) + and q.y >= np.minimum(p.y, r.y) + ) + + +def orientation(p: Point, q: Point, r: Point) -> int: + """ + To find orientation of ordered triplet (p, q, r). + The function returns following values + 0 --> p, q and r are colinear + 1 --> Clockwise + 2 --> Counterclockwise + """ + # See https://www.geeksforgeeks.org/orientation-3-ordered-points/ for details of below formula. + val = (q.y - p.y) * (r.x - q.x) - (q.x - p.x) * (r.y - q.y) + if val == 0: + return 0 # colinear + return 1 if val > 0 else 2 # clock or counterclock wise + + +class Line: + def __init__(self, p1: Point, p2: Point): + self.p1 = p1 + self.p2 = p2 + + def __str__(self): + return "Line(" + str(self.p1) + ", " + str(self.p2) + ")" + + def intersectsWith(self, other: Union["Line", "Rectangle", "Circle"]): + if isinstance(other, Line): + p1 = self.p1 + q1 = self.p2 + p2 = other.p1 + q2 = other.p2 + + # Based on https://www.geeksforgeeks.org/check-if-two-given-line-segments-intersect/ + # Find the four orientations needed for general and special cases + o1 = orientation(p1, q1, p2) + o2 = orientation(p1, q1, q2) + o3 = orientation(p2, q2, p1) + o4 = orientation(p2, q2, q1) + + # General case + if o1 != o2 and o3 != o4: + return True + + # Special Cases + # p1, q1 and p2 are colinear and p2 lies on segment p1q1 + if o1 == 0 and onSegment(p1, p2, q1): + return True + + # p1, q1 and q2 are colinear and q2 lies on segment p1q1 + if o2 == 0 and onSegment(p1, q2, q1): + return True + + # p2, q2 and p1 are colinear and p1 lies on segment p2q2 + if o3 == 0 and onSegment(p2, p1, q2): + return True + + # p2, q2 and q1 are colinear and q1 lies on segment p2q2 + if o4 == 0 and onSegment(p2, q1, q2): + return True + + return False # Doesn't fall in any of the above cases + + elif isinstance(other, Rectangle): + if self.p1.isInside(other) or self.p2.isInside(other): + return True + E = other.edges + for edge in E: + if self.intersectsWith(edge): + return True + return False + + elif isinstance(other, Circle): + return other.m.distanceTo(self) <= other.r + + raise NotImplementedError + + @property + def length(self): + return self.p1.distanceTo(self.p2) + + def dot(self, other: "Line") -> float: # assumes Line is a vector from p1 to p2 + v1 = self.p2 - self.p1 + v2 = other.p2 - other.p1 + return v1.dot(v2) + + def distanceTo(self, other: "Point") -> float: + if isinstance(other, Point): + return other.distanceTo(self) + + +class Rectangle: + # 3 points are enough to represent a rectangle + def __init__(self, c1: Point, c2: Point, c3: Point): + self.c1 = c1 + self.c2 = c2 + self.c3 = c3 + self.c4 = c3 + c1 - c2 + + def __str__(self): + return "Rectangle({}, {}, {}, {})".format(self.c1, self.c2, self.c3, self.c4) + + @property + def edges(self): + e1 = Line(self.c1, self.c2) + e2 = Line(self.c2, self.c3) + e3 = Line(self.c3, self.c4) + e4 = Line(self.c4, self.c1) + return [e1, e2, e3, e4] + + @property + def corners(self): + return [self.c1, self.c2, self.c3, self.c4] + + def intersectsWith(self, other: Union["Line", "Rectangle", "Circle"]) -> bool: + if isinstance(other, Line): + return other.intersectsWith(self) + + elif isinstance(other, Rectangle) or isinstance(other, Circle): + E = self.edges + for e in E: + if e.intersectsWith(other): + return True + return False + + raise NotImplementedError + + def distanceTo(self, other: "Point") -> float: + if isinstance(other, Point): + return other.distanceTo(self) + + +class Circle: + def __init__(self, m: Point, r: float): + self.m = m + self.r = r + + def __str__(self): + return "Circle(" + str(self.m) + ", radius = " + str(self.r) + ")" + + def intersectsWith(self, other: Union["Line", "Rectangle", "Circle"]): + if isinstance(other, Line) or isinstance(other, Rectangle): + return other.intersectsWith(self) + + elif isinstance(other, Circle): + return self.m.distanceTo(other.m) <= self.r + other.r + + raise NotImplementedError + + def distanceTo(self, other: "Point") -> float: + if isinstance(other, Point): + return other.distanceTo(self) diff --git a/imperfect_envs/driving/graphics.py b/imperfect_envs/driving/graphics.py new file mode 100644 index 0000000..4207252 --- /dev/null +++ b/imperfect_envs/driving/graphics.py @@ -0,0 +1,904 @@ +# graphics.py +"""Simple object oriented graphics library + +The library is designed to make it very easy for novice programmers to +experiment with computer graphics in an object oriented fashion. It is +written by John Zelle for use with the book "Python Programming: An +Introduction to Computer Science" (Franklin, Beedle & Associates). + +LICENSE: This is open-source software released under the terms of the +GPL (http://www.gnu.org/licenses/gpl.html). + +PLATFORMS: The package is a wrapper around Tkinter and should run on +any platform where Tkinter is available. + +INSTALLATION: Put this file somewhere where Python can see it. + +OVERVIEW: There are two kinds of objects in the library. The GraphWin +class implements a window where drawing can be done and various +GraphicsObjects are provided that can be drawn into a GraphWin. As a +simple example, here is a complete program to draw a circle of radius +10 centered in a 100x100 window: + +-------------------------------------------------------------------- +from graphics import * + +def main(): + win = GraphWin("My Circle", 100, 100) + c = Circle(Point(50,50), 10) + c.draw(win) + win.getMouse() # Pause to view result + win.close() # Close window when done + +main() +-------------------------------------------------------------------- +GraphWin objects support coordinate transformation through the +setCoords method and pointer-based input through getMouse. + +The library provides the following graphical objects: + Point + Line + Circle + Oval + Rectangle + Polygon + Text + Entry (for text-based input) + Image + +Various attributes of graphical objects can be set such as +outline-color, fill-color and line-width. Graphical objects also +support moving and hiding for animation effects. + +The library also provides a very simple class for pixel-based image +manipulation, Pixmap. A pixmap can be loaded from a file and displayed +using an Image object. Both getPixel and setPixel methods are provided +for manipulating the image. + +DOCUMENTATION: For complete documentation, see Chapter 4 of "Python +Programming: An Introduction to Computer Science" by John Zelle, +published by Franklin, Beedle & Associates. Also see +http://mcsp.wartburg.edu/zelle/python for a quick reference""" + +# Version 4.2 5/26/2011 +# * Modified Image to allow multiple undraws like other GraphicsObjects +# Version 4.1 12/29/2009 +# * Merged Pixmap and Image class. Old Pixmap removed, use Image. +# Version 4.0.1 10/08/2009 +# * Modified the autoflush on GraphWin to default to True +# * Autoflush check on close, setBackground +# * Fixed getMouse to flush pending clicks at entry +# Version 4.0 08/2009 +# * Reverted to non-threaded version. The advantages (robustness, +# efficiency, ability to use with other Tk code, etc.) outweigh +# the disadvantage that interactive use with IDLE is slightly more +# cumbersome. +# * Modified to run in either Python 2.x or 3.x (same file). +# * Added Image.getPixmap() +# * Added update() -- stand alone function to cause any pending +# graphics changes to display. +# +# Version 3.4 10/16/07 +# Fixed GraphicsError to avoid "exploded" error messages. +# Version 3.3 8/8/06 +# Added checkMouse method to GraphWin +# Version 3.2.3 +# Fixed error in Polygon init spotted by Andrew Harrington +# Fixed improper threading in Image constructor +# Version 3.2.2 5/30/05 +# Cleaned up handling of exceptions in Tk thread. The graphics package +# now raises an exception if attempt is made to communicate with +# a dead Tk thread. +# Version 3.2.1 5/22/05 +# Added shutdown function for tk thread to eliminate race-condition +# error "chatter" when main thread terminates +# Renamed various private globals with _ +# Version 3.2 5/4/05 +# Added Pixmap object for simple image manipulation. +# Version 3.1 4/13/05 +# Improved the Tk thread communication so that most Tk calls +# do not have to wait for synchonization with the Tk thread. +# (see _tkCall and _tkExec) +# Version 3.0 12/30/04 +# Implemented Tk event loop in separate thread. Should now work +# interactively with IDLE. Undocumented autoflush feature is +# no longer necessary. Its default is now False (off). It may +# be removed in a future version. +# Better handling of errors regarding operations on windows that +# have been closed. +# Addition of an isClosed method to GraphWindow class. + +# Version 2.2 8/26/04 +# Fixed cloning bug reported by Joseph Oldham. +# Now implements deep copy of config info. +# Version 2.1 1/15/04 +# Added autoflush option to GraphWin. When True (default) updates on +# the window are done after each action. This makes some graphics +# intensive programs sluggish. Turning off autoflush causes updates +# to happen during idle periods or when flush is called. +# Version 2.0 +# Updated Documentation +# Made Polygon accept a list of Points in constructor +# Made all drawing functions call TK update for easier animations +# and to make the overall package work better with +# Python 2.3 and IDLE 1.0 under Windows (still some issues). +# Removed vestigial turtle graphics. +# Added ability to configure font for Entry objects (analogous to Text) +# Added setTextColor for Text as an alias of setFill +# Changed to class-style exceptions +# Fixed cloning of Text objects + +# Version 1.6 +# Fixed Entry so StringVar uses _root as master, solves weird +# interaction with shell in Idle +# Fixed bug in setCoords. X and Y coordinates can increase in +# "non-intuitive" direction. +# Tweaked wm_protocol so window is not resizable and kill box closes. + +# Version 1.5 +# Fixed bug in Entry. Can now define entry before creating a +# GraphWin. All GraphWins are now toplevel windows and share +# a fixed root (called _root). + +# Version 1.4 +# Fixed Garbage collection of Tkinter images bug. +# Added ability to set text atttributes. +# Added Entry boxes. + +import time, os, sys + +try: # import as appropriate for 2.x vs. 3.x + import tkinter as tk +except: + import Tkinter as tk + + +########################################################################## +# Module Exceptions + + +class GraphicsError(Exception): + """Generic error class for graphics module exceptions.""" + + pass + + +OBJ_ALREADY_DRAWN = "Object currently drawn" +UNSUPPORTED_METHOD = "Object doesn't support operation" +BAD_OPTION = "Illegal option value" +DEAD_THREAD = "Graphics thread quit unexpectedly" + +try: + _root = tk.Tk() + _root.withdraw() +except: + _root = None + + +def update(): + _root.update() + + +############################################################################ +# Graphics classes start here + + +class GraphWin(tk.Canvas): + + """A GraphWin is a toplevel window for displaying graphics.""" + + def __init__(self, title="Graphics Window", width=200, height=200, autoflush=True): + master = tk.Toplevel(_root) + master.protocol("WM_DELETE_WINDOW", self.close) + tk.Canvas.__init__(self, master, width=width, height=height) + self.master.title(title) + self.pack() + master.resizable(0, 0) + self.foreground = "black" + self.items = [] + self.mouseX = None + self.mouseY = None + self.bind("", self._onClick) + self.height = height + self.width = width + self.autoflush = autoflush + self._mouseCallback = None + self.trans = None + self.closed = False + master.lift() + if autoflush: + _root.update() + + def __checkOpen(self): + if self.closed: + raise GraphicsError("window is closed") + + def setBackground(self, color): + """Set background color of the window""" + self.__checkOpen() + self.config(bg=color) + self.__autoflush() + + def setCoords(self, x1, y1, x2, y2): + """Set coordinates of window to run from (x1,y1) in the + lower-left corner to (x2,y2) in the upper-right corner.""" + self.trans = Transform(self.width, self.height, x1, y1, x2, y2) + + def close(self): + """Close the window""" + + if self.closed: + return + self.closed = True + self.master.destroy() + self.__autoflush() + + def isClosed(self): + return self.closed + + def isOpen(self): + return not self.closed + + def __autoflush(self): + if self.autoflush: + _root.update() + + def plot(self, x, y, color="black"): + """Set pixel (x,y) to the given color""" + self.__checkOpen() + xs, ys = self.toScreen(x, y) + self.create_line(xs, ys, xs + 1, ys, fill=color) + self.__autoflush() + + def plotPixel(self, x, y, color="black"): + """Set pixel raw (independent of window coordinates) pixel + (x,y) to color""" + self.__checkOpen() + self.create_line(x, y, x + 1, y, fill=color) + self.__autoflush() + + def flush(self): + """Update drawing to the window""" + self.__checkOpen() + self.update_idletasks() + + def getMouse(self): + """Wait for mouse click and return Point object representing + the click""" + self.update() # flush any prior clicks + self.mouseX = None + self.mouseY = None + while self.mouseX == None or self.mouseY == None: + self.update() + if self.isClosed(): + raise GraphicsError("getMouse in closed window") + time.sleep(0.1) # give up thread + x, y = self.toWorld(self.mouseX, self.mouseY) + self.mouseX = None + self.mouseY = None + return Point(x, y) + + def checkMouse(self): + """Return last mouse click or None if mouse has + not been clicked since last call""" + if self.isClosed(): + raise GraphicsError("checkMouse in closed window") + self.update() + if self.mouseX != None and self.mouseY != None: + x, y = self.toWorld(self.mouseX, self.mouseY) + self.mouseX = None + self.mouseY = None + return Point(x, y) + else: + return None + + def getHeight(self): + """Return the height of the window""" + return self.height + + def getWidth(self): + """Return the width of the window""" + return self.width + + def toScreen(self, x, y): + trans = self.trans + if trans: + return self.trans.screen(x, y) + else: + return x, y + + def toWorld(self, x, y): + trans = self.trans + if trans: + return self.trans.world(x, y) + else: + return x, y + + def setMouseHandler(self, func): + self._mouseCallback = func + + def _onClick(self, e): + self.mouseX = e.x + self.mouseY = e.y + if self._mouseCallback: + self._mouseCallback(Point(e.x, e.y)) + + +class Transform: + + """Internal class for 2-D coordinate transformations""" + + def __init__(self, w, h, xlow, ylow, xhigh, yhigh): + # w, h are width and height of window + # (xlow,ylow) coordinates of lower-left [raw (0,h-1)] + # (xhigh,yhigh) coordinates of upper-right [raw (w-1,0)] + xspan = xhigh - xlow + yspan = yhigh - ylow + self.xbase = xlow + self.ybase = yhigh + self.xscale = xspan / float(w - 1) + self.yscale = yspan / float(h - 1) + + def screen(self, x, y): + # Returns x,y in screen (actually window) coordinates + xs = (x - self.xbase) / self.xscale + ys = (self.ybase - y) / self.yscale + return int(xs + 0.5), int(ys + 0.5) + + def world(self, xs, ys): + # Returns xs,ys in world coordinates + x = xs * self.xscale + self.xbase + y = self.ybase - ys * self.yscale + return x, y + + +# Default values for various item configuration options. Only a subset of +# keys may be present in the configuration dictionary for a given item +DEFAULT_CONFIG = { + "fill": "", + "outline": "black", + "width": "1", + "arrow": "none", + "text": "", + "justify": "center", + "font": ("helvetica", 12, "normal"), +} + + +class GraphicsObject: + + """Generic base class for all of the drawable objects""" + + # A subclass of GraphicsObject should override _draw and + # and _move methods. + + def __init__(self, options): + # options is a list of strings indicating which options are + # legal for this object. + + # When an object is drawn, canvas is set to the GraphWin(canvas) + # object where it is drawn and id is the TK identifier of the + # drawn shape. + self.canvas = None + self.id = None + + # config is the dictionary of configuration options for the widget. + config = {} + for option in options: + config[option] = DEFAULT_CONFIG[option] + self.config = config + + def setFill(self, color): + """Set interior color to color""" + self._reconfig("fill", color) + + def setOutline(self, color): + """Set outline color to color""" + self._reconfig("outline", color) + + def setWidth(self, width): + """Set line weight to width""" + self._reconfig("width", width) + + def draw(self, graphwin): + + """Draw the object in graphwin, which should be a GraphWin + object. A GraphicsObject may only be drawn into one + window. Raises an error if attempt made to draw an object that + is already visible.""" + + if self.canvas and not self.canvas.isClosed(): + raise GraphicsError(OBJ_ALREADY_DRAWN) + if graphwin.isClosed(): + raise GraphicsError("Can't draw to closed window") + self.canvas = graphwin + self.id = self._draw(graphwin, self.config) + if graphwin.autoflush: + _root.update() + + def undraw(self): + + """Undraw the object (i.e. hide it). Returns silently if the + object is not currently drawn.""" + + if not self.canvas: + return + if not self.canvas.isClosed(): + self.canvas.delete(self.id) + if self.canvas.autoflush: + _root.update() + self.canvas = None + self.id = None + + def move(self, dx, dy): + + """move object dx units in x direction and dy units in y + direction""" + + self._move(dx, dy) + canvas = self.canvas + if canvas and not canvas.isClosed(): + trans = canvas.trans + if trans: + x = dx / trans.xscale + y = -dy / trans.yscale + else: + x = dx + y = dy + self.canvas.move(self.id, x, y) + if canvas.autoflush: + _root.update() + + def _reconfig(self, option, setting): + # Internal method for changing configuration of the object + # Raises an error if the option does not exist in the config + # dictionary for this object + if option not in self.config: + raise GraphicsError(UNSUPPORTED_METHOD) + options = self.config + options[option] = setting + if self.canvas and not self.canvas.isClosed(): + self.canvas.itemconfig(self.id, options) + if self.canvas.autoflush: + _root.update() + + def _draw(self, canvas, options): + """draws appropriate figure on canvas with options provided + Returns Tk id of item drawn""" + pass # must override in subclass + + def _move(self, dx, dy): + """updates internal state of object to move it dx,dy units""" + pass # must override in subclass + + +class Point(GraphicsObject): + def __init__(self, x, y): + GraphicsObject.__init__(self, ["outline", "fill"]) + self.setFill = self.setOutline + self.x = x + self.y = y + + def _draw(self, canvas, options): + x, y = canvas.toScreen(self.x, self.y) + return canvas.create_rectangle(x, y, x + 1, y + 1, options) + + def _move(self, dx, dy): + self.x = self.x + dx + self.y = self.y + dy + + def clone(self): + other = Point(self.x, self.y) + other.config = self.config.copy() + return other + + def getX(self): + return self.x + + def getY(self): + return self.y + + +class _BBox(GraphicsObject): + # Internal base class for objects represented by bounding box + # (opposite corners) Line segment is a degenerate case. + + def __init__(self, p1, p2, options=["outline", "width", "fill"]): + GraphicsObject.__init__(self, options) + self.p1 = p1.clone() + self.p2 = p2.clone() + + def _move(self, dx, dy): + self.p1.x = self.p1.x + dx + self.p1.y = self.p1.y + dy + self.p2.x = self.p2.x + dx + self.p2.y = self.p2.y + dy + + def getP1(self): + return self.p1.clone() + + def getP2(self): + return self.p2.clone() + + def getCenter(self): + p1 = self.p1 + p2 = self.p2 + return Point((p1.x + p2.x) / 2.0, (p1.y + p2.y) / 2.0) + + +class Rectangle(_BBox): + def __init__(self, p1, p2): + _BBox.__init__(self, p1, p2) + + def _draw(self, canvas, options): + p1 = self.p1 + p2 = self.p2 + x1, y1 = canvas.toScreen(p1.x, p1.y) + x2, y2 = canvas.toScreen(p2.x, p2.y) + return canvas.create_rectangle(x1, y1, x2, y2, options) + + def clone(self): + other = Rectangle(self.p1, self.p2) + other.config = self.config.copy() + return other + + +class Oval(_BBox): + def __init__(self, p1, p2): + _BBox.__init__(self, p1, p2) + + def clone(self): + other = Oval(self.p1, self.p2) + other.config = self.config.copy() + return other + + def _draw(self, canvas, options): + p1 = self.p1 + p2 = self.p2 + x1, y1 = canvas.toScreen(p1.x, p1.y) + x2, y2 = canvas.toScreen(p2.x, p2.y) + return canvas.create_oval(x1, y1, x2, y2, options) + + +class Circle(Oval): + def __init__(self, center, radius): + p1 = Point(center.x - radius, center.y - radius) + p2 = Point(center.x + radius, center.y + radius) + Oval.__init__(self, p1, p2) + self.radius = radius + + def clone(self): + other = Circle(self.getCenter(), self.radius) + other.config = self.config.copy() + return other + + def getRadius(self): + return self.radius + + +class Line(_BBox): + def __init__(self, p1, p2): + _BBox.__init__(self, p1, p2, ["arrow", "fill", "width"]) + self.setFill(DEFAULT_CONFIG["outline"]) + self.setOutline = self.setFill + + def clone(self): + other = Line(self.p1, self.p2) + other.config = self.config.copy() + return other + + def _draw(self, canvas, options): + p1 = self.p1 + p2 = self.p2 + x1, y1 = canvas.toScreen(p1.x, p1.y) + x2, y2 = canvas.toScreen(p2.x, p2.y) + return canvas.create_line(x1, y1, x2, y2, options) + + def setArrow(self, option): + if not option in ["first", "last", "both", "none"]: + raise GraphicsError(BAD_OPTION) + self._reconfig("arrow", option) + + +class Polygon(GraphicsObject): + def __init__(self, *points): + # if points passed as a list, extract it + if len(points) == 1 and type(points[0]) == type([]): + points = points[0] + self.points = list(map(Point.clone, points)) + GraphicsObject.__init__(self, ["outline", "width", "fill"]) + + def clone(self): + other = Polygon(*self.points) + other.config = self.config.copy() + return other + + def getPoints(self): + return list(map(Point.clone, self.points)) + + def _move(self, dx, dy): + for p in self.points: + p.move(dx, dy) + + def _draw(self, canvas, options): + args = [canvas] + for p in self.points: + x, y = canvas.toScreen(p.x, p.y) + args.append(x) + args.append(y) + args.append(options) + return GraphWin.create_polygon(*args) + + +class Text(GraphicsObject): + def __init__(self, p, text): + GraphicsObject.__init__(self, ["justify", "fill", "text", "font"]) + self.setText(text) + self.anchor = p.clone() + self.setFill(DEFAULT_CONFIG["outline"]) + self.setOutline = self.setFill + + def _draw(self, canvas, options): + p = self.anchor + x, y = canvas.toScreen(p.x, p.y) + return canvas.create_text(x, y, options) + + def _move(self, dx, dy): + self.anchor.move(dx, dy) + + def clone(self): + other = Text(self.anchor, self.config["text"]) + other.config = self.config.copy() + return other + + def setText(self, text): + self._reconfig("text", text) + + def getText(self): + return self.config["text"] + + def getAnchor(self): + return self.anchor.clone() + + def setFace(self, face): + if face in ["helvetica", "arial", "courier", "times roman"]: + f, s, b = self.config["font"] + self._reconfig("font", (face, s, b)) + else: + raise GraphicsError(BAD_OPTION) + + def setSize(self, size): + if 5 <= size <= 36: + f, s, b = self.config["font"] + self._reconfig("font", (f, size, b)) + else: + raise GraphicsError(BAD_OPTION) + + def setStyle(self, style): + if style in ["bold", "normal", "italic", "bold italic"]: + f, s, b = self.config["font"] + self._reconfig("font", (f, s, style)) + else: + raise GraphicsError(BAD_OPTION) + + def setTextColor(self, color): + self.setFill(color) + + +class Entry(GraphicsObject): + def __init__(self, p, width): + GraphicsObject.__init__(self, []) + self.anchor = p.clone() + # print self.anchor + self.width = width + self.text = tk.StringVar(_root) + self.text.set("") + self.fill = "gray" + self.color = "black" + self.font = DEFAULT_CONFIG["font"] + self.entry = None + + def _draw(self, canvas, options): + p = self.anchor + x, y = canvas.toScreen(p.x, p.y) + frm = tk.Frame(canvas.master) + self.entry = tk.Entry( + frm, + width=self.width, + textvariable=self.text, + bg=self.fill, + fg=self.color, + font=self.font, + ) + self.entry.pack() + # self.setFill(self.fill) + return canvas.create_window(x, y, window=frm) + + def getText(self): + return self.text.get() + + def _move(self, dx, dy): + self.anchor.move(dx, dy) + + def getAnchor(self): + return self.anchor.clone() + + def clone(self): + other = Entry(self.anchor, self.width) + other.config = self.config.copy() + other.text = tk.StringVar() + other.text.set(self.text.get()) + other.fill = self.fill + return other + + def setText(self, t): + self.text.set(t) + + def setFill(self, color): + self.fill = color + if self.entry: + self.entry.config(bg=color) + + def _setFontComponent(self, which, value): + font = list(self.font) + font[which] = value + self.font = tuple(font) + if self.entry: + self.entry.config(font=self.font) + + def setFace(self, face): + if face in ["helvetica", "arial", "courier", "times roman"]: + self._setFontComponent(0, face) + else: + raise GraphicsError(BAD_OPTION) + + def setSize(self, size): + if 5 <= size <= 36: + self._setFontComponent(1, size) + else: + raise GraphicsError(BAD_OPTION) + + def setStyle(self, style): + if style in ["bold", "normal", "italic", "bold italic"]: + self._setFontComponent(2, style) + else: + raise GraphicsError(BAD_OPTION) + + def setTextColor(self, color): + self.color = color + if self.entry: + self.entry.config(fg=color) + + +class Image(GraphicsObject): + + idCount = 0 + imageCache = {} # tk photoimages go here to avoid GC while drawn + + def __init__(self, p, *pixmap): + GraphicsObject.__init__(self, []) + self.anchor = p.clone() + self.imageId = Image.idCount + Image.idCount = Image.idCount + 1 + if len(pixmap) == 1: # file name provided + self.img = tk.PhotoImage(file=pixmap[0], master=_root) + else: # width and height provided + width, height = pixmap + self.img = tk.PhotoImage(master=_root, width=width, height=height) + + def _draw(self, canvas, options): + p = self.anchor + x, y = canvas.toScreen(p.x, p.y) + self.imageCache[self.imageId] = self.img # save a reference + return canvas.create_image(x, y, image=self.img) + + def _move(self, dx, dy): + self.anchor.move(dx, dy) + + def undraw(self): + try: + del self.imageCache[self.imageId] # allow gc of tk photoimage + except KeyError: + pass + GraphicsObject.undraw(self) + + def getAnchor(self): + return self.anchor.clone() + + def clone(self): + other = Image(Point(0, 0), 0, 0) + other.img = self.img.copy() + other.anchor = self.anchor.clone() + other.config = self.config.copy() + return other + + def getWidth(self): + """Returns the width of the image in pixels""" + return self.img.width() + + def getHeight(self): + """Returns the height of the image in pixels""" + return self.img.height() + + def getPixel(self, x, y): + """Returns a list [r,g,b] with the RGB color values for pixel (x,y) + r,g,b are in range(256) + + """ + + value = self.img.get(x, y) + if type(value) == type(0): + return [value, value, value] + else: + return list(map(int, value.split())) + + def setPixel(self, x, y, color): + """Sets pixel (x,y) to the given color + + """ + self.img.put("{" + color + "}", (x, y)) + + def save(self, filename): + """Saves the pixmap image to filename. + The format for the save image is determined from the filname extension. + + """ + + path, name = os.path.split(filename) + ext = name.split(".")[-1] + self.img.write(filename, format=ext) + + +def color_rgb(r, g, b): + """r,g,b are intensities of red, green, and blue in range(256) + Returns color specifier string for the resulting color""" + return "#%02x%02x%02x" % (r, g, b) + + +def test(): + win = GraphWin() + win.setCoords(0, 0, 10, 10) + t = Text(Point(5, 5), "Centered Text") + t.draw(win) + p = Polygon(Point(1, 1), Point(5, 3), Point(2, 7)) + p.draw(win) + e = Entry(Point(5, 6), 10) + e.draw(win) + win.getMouse() + p.setFill("red") + p.setOutline("blue") + p.setWidth(2) + s = "" + for pt in p.getPoints(): + s = s + "(%0.1f,%0.1f) " % (pt.getX(), pt.getY()) + t.setText(e.getText()) + e.setFill("green") + e.setText("Spam!") + e.move(2, 0) + win.getMouse() + p.move(2, 3) + s = "" + for pt in p.getPoints(): + s = s + "(%0.1f,%0.1f) " % (pt.getX(), pt.getY()) + t.setText(s) + win.getMouse() + p.undraw() + e.undraw() + t.setStyle("bold") + win.getMouse() + t.setStyle("normal") + win.getMouse() + t.setStyle("italic") + win.getMouse() + t.setStyle("bold italic") + win.getMouse() + t.setSize(14) + win.getMouse() + t.setFace("arial") + t.setSize(20) + win.getMouse() + win.close() + + +if __name__ == "__main__": + test() diff --git a/imperfect_envs/driving/visualizer.py b/imperfect_envs/driving/visualizer.py new file mode 100644 index 0000000..d7c3b1f --- /dev/null +++ b/imperfect_envs/driving/visualizer.py @@ -0,0 +1,82 @@ +from driving.graphics import * +from driving.entities import RectangleEntity, CircleEntity, TextEntity + + + +class Visualizer: + def __init__(self, width: float, height: float, ppm: int): + # width (meters) + # height (meters) + # ppm is the number of pixels per meters + + self.ppm = ppm + self.display_width, self.display_height = int(width * ppm), int(height * ppm) + self.window_created = False + self.visualized_imgs = [] + self.win = None + + def create_window(self, bg_color: str = "gray80"): + if not self.window_created or self.win.isClosed(): + self.win = GraphWin("CARLO", self.display_width, self.display_height) + self.win.setBackground(bg_color) + self.window_created = True + self.visualized_imgs = [] + + def update_agents(self, agents: list, correct_pos: list=None, next_pos: list=None): + new_visualized_imgs = [] + + # Remove the movable agents from the window + for imgItem in self.visualized_imgs: + if imgItem["movable"]: + imgItem["graphics"].undraw() + else: + new_visualized_imgs.append({"movable": False, "graphics": imgItem["graphics"]}) + + # Add the updated movable agents (and the unmovable ones if they were not rendered before) + for agent in agents: + if isinstance(agent, TextEntity): + img = Text( + Point( + self.ppm * agent.center.x, + self.display_height - self.ppm * agent.center.y, + ), + agent.text, + ) + img.setSize(15) + img.draw(self.win) + # TODO(allanz): Hack: set movable=True so text is erased each iteration. + new_visualized_imgs.append({"movable": True, "graphics": img}) + elif agent.movable or not self.visualized_imgs: + if isinstance(agent, RectangleEntity): + C = [self.ppm * c for c in agent.corners] + img = Polygon([Point(c.x, self.display_height - c.y) for c in C]) + + # arrow + if agent.movable and (correct_pos and next_pos): + start = Point(self.ppm * agent.center.x, self.display_height - self.ppm * agent.center.y) + end = Point(self.ppm * correct_pos[0], self.display_height - self.ppm * correct_pos[1]) + #print("ACTION??: ", correct_pos) + line = Line(start, end) + line.setArrow("last") + line.draw(self.win) + new_visualized_imgs.append({"movable": agent.movable, "graphics": line}) + elif isinstance(agent, CircleEntity): + img = Circle( + Point( + self.ppm * agent.center.x, + self.display_height - self.ppm * agent.center.y, + ), + self.ppm * agent.radius, + ) + else: + raise NotImplementedError + img.setFill(agent.color) + img.draw(self.win) + new_visualized_imgs.append({"movable": agent.movable, "graphics": img}) + + self.visualized_imgs = new_visualized_imgs + + def close(self): + self.window_created = False + self.win.close() + self.visualized_imgs = [] diff --git a/imperfect_envs/driving/world.py b/imperfect_envs/driving/world.py new file mode 100644 index 0000000..2757471 --- /dev/null +++ b/imperfect_envs/driving/world.py @@ -0,0 +1,83 @@ +from typing import Union +import numpy as np +from driving.agents import Car, Pedestrian, Building +from driving.entities import Entity +from driving.visualizer import Visualizer + + +class World: + def __init__(self, dt: float, width: float, height: float, ppm: float = 8): + self.dynamic_agents = [] + self.static_agents = [] + self.t = 0 # simulation time + self.dt = dt # simulation time step + self.visualizer = Visualizer(width, height, ppm=ppm) + + def add(self, entity: Entity): + if entity.movable: + self.dynamic_agents.append(entity) + else: + self.static_agents.append(entity) + + def tick(self): + for agent in self.dynamic_agents: + agent.tick(self.dt) + self.t += self.dt + + def render(self, correct_pos=None, next_pos=None): + self.visualizer.create_window(bg_color="gray") + self.visualizer.update_agents(self.agents, correct_pos, next_pos) + + @property + def state(self): + return np.concatenate([agent.state for agent in self.dynamic_agents]) + + @state.setter + def state(self, x): + num_agents = len(self.dynamic_agents) + assert x.shape[0] % num_agents == 0 + agent_state_length = int(x.shape[0] / num_agents) + offset = 0 + for agent in self.dynamic_agents: + agent_new_state = x[offset : offset + agent_state_length] + agent.state = agent_new_state + offset += agent_state_length + + @property + def agents(self): + return self.static_agents + self.dynamic_agents + + def collision_exists(self, agent=None): + if agent is None: + for i in range(len(self.dynamic_agents)): + for j in range(i + 1, len(self.dynamic_agents)): + if self.dynamic_agents[i].collidable and self.dynamic_agents[j].collidable: + if self.dynamic_agents[i].collidesWith(self.dynamic_agents[j]): + return True + for j in range(len(self.static_agents)): + if self.dynamic_agents[i].collidable and self.static_agents[j].collidable: + if self.dynamic_agents[i].collidesWith(self.static_agents[j]): + return True + return False + + if not agent.collidable: + return False + + for i in range(len(self.agents)): + if ( + self.agents[i] is not agent + and self.agents[i].collidable + and agent.collidesWith(self.agents[i]) + ): + return True + return False + + def close(self): + self.reset() + self.static_agents = [] + self.visualizer.close() + + def reset(self): + self.dynamic_agents = [] + self.static_agents = [] + self.t = 0 diff --git a/imperfect_envs/imperfect.egg-info/PKG-INFO b/imperfect_envs/imperfect.egg-info/PKG-INFO new file mode 100644 index 0000000..75eeb17 --- /dev/null +++ b/imperfect_envs/imperfect.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 2.1 +Name: imperfect +Version: 0.0.1 +Summary: UNKNOWN +Home-page: UNKNOWN +License: UNKNOWN +Platform: UNKNOWN + +UNKNOWN + diff --git a/imperfect_envs/imperfect.egg-info/SOURCES.txt b/imperfect_envs/imperfect.egg-info/SOURCES.txt new file mode 100644 index 0000000..1a9d17b --- /dev/null +++ b/imperfect_envs/imperfect.egg-info/SOURCES.txt @@ -0,0 +1,7 @@ +README.md +setup.py +imperfect.egg-info/PKG-INFO +imperfect.egg-info/SOURCES.txt +imperfect.egg-info/dependency_links.txt +imperfect.egg-info/requires.txt +imperfect.egg-info/top_level.txt \ No newline at end of file diff --git a/imperfect_envs/imperfect.egg-info/dependency_links.txt b/imperfect_envs/imperfect.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/imperfect_envs/imperfect.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/imperfect_envs/imperfect.egg-info/requires.txt b/imperfect_envs/imperfect.egg-info/requires.txt new file mode 100644 index 0000000..01ef558 --- /dev/null +++ b/imperfect_envs/imperfect.egg-info/requires.txt @@ -0,0 +1,3 @@ +gym +numpy +reacher diff --git a/imperfect_envs/imperfect.egg-info/top_level.txt b/imperfect_envs/imperfect.egg-info/top_level.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/imperfect_envs/imperfect.egg-info/top_level.txt @@ -0,0 +1 @@ + diff --git a/imperfect_envs/reacher/__init__.py b/imperfect_envs/reacher/__init__.py new file mode 100644 index 0000000..4f42dff --- /dev/null +++ b/imperfect_envs/reacher/__init__.py @@ -0,0 +1,35 @@ +from gym.envs.registration import register + +register( + id='reacher_custom-v0', + entry_point='reacher.envs:ReacherCustomEnv', + max_episode_steps=50, + reward_threshold=-3.75, +) + +register( + id='reacher_custom-action1-v0', + entry_point='reacher.envs:ReacherCustomAction1Env', + max_episode_steps=50, + reward_threshold=-3.75, +) + +register( + id='reacher_custom-action2-v0', + entry_point='reacher.envs:ReacherCustomAction2Env', + max_episode_steps=50, + reward_threshold=-3.75, +) +register( + id='reacher_custom-raction1-v0', + entry_point='reacher.envs:ReacherCustomRAction1Env', + max_episode_steps=50, + reward_threshold=-3.75, +) + +register( + id='reacher_custom-raction2-v0', + entry_point='reacher.envs:ReacherCustomRAction2Env', + max_episode_steps=50, + reward_threshold=-3.75, +) diff --git a/imperfect_envs/reacher/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/reacher/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..f82214d Binary files /dev/null and b/imperfect_envs/reacher/__pycache__/__init__.cpython-310.pyc differ diff --git a/imperfect_envs/reacher/envs/__init__.py b/imperfect_envs/reacher/envs/__init__.py new file mode 100644 index 0000000..8ab022f --- /dev/null +++ b/imperfect_envs/reacher/envs/__init__.py @@ -0,0 +1,2 @@ +from reacher.envs.reacher import ReacherCustomEnv +from reacher.envs.reacher import ReacherCustomAction1Env, ReacherCustomAction2Env, ReacherCustomRAction1Env, ReacherCustomRAction2Env diff --git a/imperfect_envs/reacher/envs/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/reacher/envs/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..f95ddcb Binary files /dev/null and b/imperfect_envs/reacher/envs/__pycache__/__init__.cpython-310.pyc differ diff --git a/imperfect_envs/reacher/envs/__pycache__/reacher.cpython-310.pyc b/imperfect_envs/reacher/envs/__pycache__/reacher.cpython-310.pyc new file mode 100644 index 0000000..2a15908 Binary files /dev/null and b/imperfect_envs/reacher/envs/__pycache__/reacher.cpython-310.pyc differ diff --git a/imperfect_envs/reacher/envs/assets/reacher.xml b/imperfect_envs/reacher/envs/assets/reacher.xml new file mode 100644 index 0000000..be97198 --- /dev/null +++ b/imperfect_envs/reacher/envs/assets/reacher.xml @@ -0,0 +1,39 @@ + + + + + + + diff --git a/imperfect_envs/reacher/envs/assets/reacher_action1.xml b/imperfect_envs/reacher/envs/assets/reacher_action1.xml new file mode 100644 index 0000000..a86f5b4 --- /dev/null +++ b/imperfect_envs/reacher/envs/assets/reacher_action1.xml @@ -0,0 +1,39 @@ + + + + + + + diff --git a/imperfect_envs/reacher/envs/assets/reacher_action2.xml b/imperfect_envs/reacher/envs/assets/reacher_action2.xml new file mode 100644 index 0000000..a685385 --- /dev/null +++ b/imperfect_envs/reacher/envs/assets/reacher_action2.xml @@ -0,0 +1,39 @@ + + + + + + + diff --git a/imperfect_envs/reacher/envs/reacher.py b/imperfect_envs/reacher/envs/reacher.py new file mode 100644 index 0000000..9ba0567 --- /dev/null +++ b/imperfect_envs/reacher/envs/reacher.py @@ -0,0 +1,97 @@ +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env +import gym +from gym import spaces +import os +import random + +class ReacherCustomEnv(mujoco_env.MuJocoPyEnv, utils.EzPickle): + metadata = {'render_modes': ['human', 'rgb_array', 'depth_array'], 'render_fps': 50} + def __init__(self, config_file='reacher.xml', **kwargs): + dir_path = os.path.dirname(os.path.realpath(__file__)) + utils.EzPickle.__init__(self) + # print("fullpath is here ", self.fullpath) + # self._initialize_simulation() + self.observation_space = spaces.Box(low = -np.inf, high = np.inf, shape=(11,), dtype=np.float32) + # self.action_space = spaces.Box(low = -np.inf, high = np.inf, shape=(2,), dtype=np.float32) + mujoco_env.MuJocoPyEnv.__init__(self, ('%s/assets/'+config_file) % dir_path, 2, self.observation_space, **kwargs) + + def step(self, a): + vec = self.get_body_com("fingertip")-self.get_body_com("target") + reward_dist = - np.linalg.norm(vec) + reward_ctrl = - np.square(a).sum() + reward = reward_dist + reward_ctrl + self.do_simulation(a, self.frame_skip) + ob = self._get_obs() + done = False + reward_for_eval = reward_dist * 10# - np.sqrt(self.sim.data.qvel.flat[0]**2+self.sim.data.qvel.flat[1]**2) / 20. + + return ob, reward, done, False, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, reward_eval=reward_for_eval) + + def viewer_setup(self): + self.viewer.cam.trackbodyid = 0 + + def reset_with_obs(self, obs): + self.sim.reset() + qpos = np.array([0., 0., 0., 0.]) + self.goal = obs[4:6] + qpos[-2:] = self.goal + qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) + qvel[-2:] = 0 + qvel[0:2] = obs[6:8] + self.set_state(qpos, qvel) + return self._get_obs() + + def reset_model(self): + #self.close_goal = False + #qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos + #while True: + # self.goal = self.np_random.uniform(low=-.2, high=.2, size=2) + # if np.linalg.norm(self.goal) < 0.2: + # break + qpos = np.array([0., 0., 0., 0.]) + self.goal = np.concatenate([self.np_random.uniform(low=-.1, high=.1, size=1), + self.np_random.uniform(low=-.2, high=-.1, size=1) if self.np_random.uniform(low=0, high=1., size=1)[0]>0.5 else self.np_random.uniform(low=.1, high=.2, size=1)]) + qpos[-2:] = self.goal + qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) + qvel[-2:] = 0 + self.set_state(qpos, qvel) + return self._get_obs() + + def _get_obs(self): + theta = self.sim.data.qpos.flat[:2] + return np.concatenate([ + np.cos(theta), + np.sin(theta), + self.sim.data.qpos.flat[2:], + self.sim.data.qvel.flat[:2], + self.get_body_com("fingertip") - self.get_body_com("target") + ]) + +class ReacherCustomAction1Env(ReacherCustomEnv): + def __init__(self, **kwargs): + super(ReacherCustomAction1Env, self).__init__('reacher_action1.xml', **kwargs) + +class ReacherCustomRAction1Env(ReacherCustomEnv): + def __init__(self, **kwargs): + super(ReacherCustomRAction1Env, self).__init__('reacher_action1.xml', **kwargs) + self.action_space = gym.spaces.Box(low=np.array([-1.,-1.]).astype('float32'), high=np.array([0.,0.]).astype('float32')) + + def step(self, a): + a = np.clip(a, -1., 0.) + return super(ReacherCustomRAction1Env, self).step(a) + +class ReacherCustomAction2Env(ReacherCustomEnv): + def __init__(self, **kwargs): + super(ReacherCustomAction2Env, self).__init__('reacher_action2.xml', **kwargs) + +class ReacherCustomRAction2Env(ReacherCustomEnv): + def __init__(self, **kwargs): + super(ReacherCustomRAction2Env, self).__init__('reacher_action2.xml', kwargs) + self.action_space = gym.spaces.Box(low=np.array([0.,0.]).astype('float32'), high=np.array([1.,1.]).astype('float32')) + + def step(self, a): + a = np.clip(a, 0., 1.) + return super(ReacherCustomRAction2Env, self).step(a) + diff --git a/imperfect_envs/setup.py b/imperfect_envs/setup.py new file mode 100644 index 0000000..b25667d --- /dev/null +++ b/imperfect_envs/setup.py @@ -0,0 +1,6 @@ +from setuptools import setup + +setup(name='imperfect', + version='0.0.1', + install_requires=['gym', 'numpy', 'reacher'] # And any other dependencies foo needs +) diff --git a/simulate_data.py b/simulate_data.py new file mode 100644 index 0000000..5ae9390 --- /dev/null +++ b/simulate_data.py @@ -0,0 +1,122 @@ +import gym +import gym.wrappers +import reacher +import driving +import time +from gym import make +import numpy as np +import argparse +import pickle +parser = argparse.ArgumentParser(description='Test the model') +parser.add_argument('--num-episodes', type=int, default=10) +parser.add_argument('--seed', type=int, default=1001) +parser.add_argument('--use-sleep', action='store_true') +parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0') +parser.add_argument('--data_path', type=str, default='new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl') +parser.add_argument('--render', action='store_true') +parser.add_argument('--goalx', type=int, default=15) +parser.add_argument('--goaly', type=int, default=38) + +paths = {'optimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl', + 'suboptimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl', + 'optimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl', + 'suboptimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl', + 'optimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl', + 'suboptimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl', + 'optimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl', + 'suboptimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl', + 'optimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl', + 'suboptimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl', + 'optimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl', + 'suboptimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl'} +args = parser.parse_args() +with open(args.data_path, 'rb') as f: + episodes = pickle.load(f) +print(len(episodes)) +print(episodes[1]['reward']) +args = parser.parse_args() + + +env1 = gym.make(args.env) +num_inputs = env1.observation_space.shape[0] +num_actions = env1.action_space.shape[0] +print(num_inputs, num_actions) +for i in range(args.num_episodes): + accumulator = 0 + accumulator2 = 0 + initial_state = episodes[i]['state'][0] + env1.reset(goal = initial_state[7:9]) + print('goal location', initial_state[7:9]) + env1.reset_with_obs(initial_state) + # print("initial state ", initial_state) + # print("initial state as per the model ", env1.get_obs()) + assert np.allclose(env1.get_obs(), initial_state) + if args.render: + env1.render() + for step in range(len(episodes[i]['reward'])): + action = episodes[i]['action'][step] + print(action.dtype) + next_state, reward, done, _, info= env1.step(action) + print("next state as per the model ", next_state) + print("next state as per the data ", episodes[i]['state'][step+1]) + try: + assert np.allclose(next_state, episodes[i]['state'][step+1]) + except: + print("the two states are not the same ", next_state, episodes[i]['state'][step+1]) + accumulator += episodes[i]['reward'][step] + accumulator2 += reward + if args.render: + env1.render() + if args.use_sleep: + time.sleep(0.05) + time.sleep(0.1) + print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2)) +env1.close() + +# with open(paths['optimalfast1538'], 'rb') as f: +# episodes = pickle.load(f) +# print(len(episodes)) +# print(episodes[1]['reward']) +# environment = args.env +# env1 = gym.make(environment) +# # env1.set_goal(args.goalx, args.goaly) +# # env1.reset(goal = [args.goalx, args.goaly]) +# # env1.set_goal(args.goalx, args.goaly) +# num_inputs = env1.observation_space.shape[0] +# num_actions = env1.action_space.shape[0] +# print(num_inputs, num_actions) +# for i in range(args.num_episodes): +# accumulator = 0 +# accumulator2 = 0 +# initial_state = episodes[i]['state'][0] +# env1.reset(goal = initial_state[7:9]) +# print('goal location', initial_state[7:9]) +# # env1.set_goal(initial_state[7], initial_state[8]) +# env1.reset_with_obs(initial_state) +# if args.render: +# env1.render() +# for step in range(len(episodes[i]['reward'])): +# # accumulator += episodes[i]['reward'][step] +# action = episodes[i]['action'][step] +# # print(episodes[i]['reward'][step]) +# next_state, reward, done, _, info= env1.step(action) +# # print("next state as per the model ", next_state) +# # print("next state as per the data ", episodes[i]['state'][step+1]) +# # env1.reset_with_obs(episodes[i]['state'][step+1]) +# accumulator += episodes[i]['reward'][step] +# accumulator2 += reward +# if args.render: +# env1.render() +# if args.use_sleep: +# time.sleep(0.05) +# time.sleep(0.1) +# print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2)) +# env1.close() + + +# import pickle as pkl +# with open('new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl', 'rb') as f: +# data = pkl.load(f) +# print(len(data[0]['state'])) +# print((data[0]['state'][0])) +# print(len(data[0]['state'][0])) diff --git a/test.py b/test.py index e23cbdf..83487bf 100644 --- a/test.py +++ b/test.py @@ -5,10 +5,12 @@ import torch import numpy as np - +import time import gym -import roboschool - +import pickle +# import roboschool +import driving +import argparse from PPO import PPO @@ -17,42 +19,60 @@ def test(): print("============================================================================================") ################## hyperparameters ################## - - # env_name = "CartPole-v1" + parser = argparse.ArgumentParser(description='Test the model') + parser.add_argument('--seed', type=int, default=1001) + parser.add_argument('--goalx', type=int, default=15) + parser.add_argument('--goaly', type=int, default=38) + parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0') + parser.add_argument('--render', action='store_true') + parser.add_argument('--use-sleep', action='store_true') + parser.add_argument('--optimal', action='store_true') + parser.add_argument('--suboptimal', action='store_true') + parser.add_argument('--num_episodes', type=int, default=10) + parser.add_argument('--max_num_samples', type=int, default=1000) + parser.add_argument('--threshold', type=float, default=0) + parser.add_argument('--dontsave', action='store_true') + args = parser.parse_args() + # args.env = "CartPole-v1" # has_continuous_action_space = False # max_ep_len = 400 # action_std = None - # env_name = "LunarLander-v2" + # args.env = "LunarLander-v2" # has_continuous_action_space = False # max_ep_len = 300 # action_std = None - # env_name = "BipedalWalker-v2" + # args.env = "BipedalWalker-v2" # has_continuous_action_space = True # max_ep_len = 1500 # max timesteps in one episode # action_std = 0.1 # set same std for action distribution which was used while saving - env_name = "RoboschoolWalker2d-v1" has_continuous_action_space = True max_ep_len = 1000 # max timesteps in one episode action_std = 0.1 # set same std for action distribution which was used while saving - render = True # render environment on screen + # delay = True # add delay b/w frames to make video like real time + # render = True # render environment on screen frame_delay = 0 # if required; add delay b/w frames - total_test_episodes = 10 # total num of testing episodes + # total_test_episodes = 10 # total num of testing episodes K_epochs = 80 # update policy for K epochs eps_clip = 0.2 # clip parameter for PPO gamma = 0.99 # discount factor lr_actor = 0.0003 # learning rate for actor - lr_critic = 0.001 # learning rate for critic - + lr_critic = 0.001 + # learning rate for critic + run_best_model = False + if args.optimal: + run_best_model = True + # load and run the best saved model ##################################################### - env = gym.make(env_name) + env = gym.make(args.env) + env.set_goal(args.goalx, args.goaly) # state space dimension state_dim = env.observation_space.shape[0] @@ -71,8 +91,24 @@ def test(): random_seed = 0 #### set this to load a particular checkpoint trained on random seed run_num_pretrained = 0 #### set this to load a particular checkpoint num - directory = "PPO_preTrained" + '/' + env_name + '/' - checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, random_seed, run_num_pretrained) + directory = "PPO_preTrained" + '/' + args.env + '/' + data_directory = "new_data" + if not os.path.exists(directory): + print("No directory found") + exit() + if not os.path.exists(data_directory): + os.makedirs(data_directory) + data_directory = data_directory + '/' + args.env + '/' + if not os.path.exists(data_directory): + os.makedirs(data_directory) + + if not run_best_model: + checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + else: + checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}best.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + + optimal_data_path = data_directory + "optimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + suboptimal_data_path = data_directory + "suboptimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) print("loading network from : " + checkpoint_path) ppo_agent.load(checkpoint_path) @@ -80,40 +116,104 @@ def test(): print("--------------------------------------------------------------------------------------------") test_running_reward = 0 - - for ep in range(1, total_test_episodes+1): + dataload = [] + num_optimal = 1 + num_suboptimal = 1 + for ep in range(1, args.num_episodes+1): ep_reward = 0 state = env.reset() - + state_dict = {'state' : [], 'action': [], 'reward': [], 'optimal': []} + state_dict['state'].append(state) for t in range(1, max_ep_len+1): action = ppo_agent.select_action(state) - state, reward, done, _ = env.step(action) + # print(action.dtype) + state, reward, done, _, _= env.step(action) ep_reward += reward - - if render: + state_dict['state'].append(state) + state_dict['action'].append(action) + state_dict['reward'].append(reward) + if args.render: env.render() - time.sleep(frame_delay) - + # time.sleep(frame_delay) + if args.use_sleep: + time.sleep(0.05) if done: break # clear buffer ppo_agent.buffer.clear() - + if num_optimal > args.max_num_samples and args.optimal and not args.suboptimal: + break + if num_suboptimal > args.max_num_samples and args.suboptimal and not args.optimal: + break + if num_optimal > args.max_num_samples and num_suboptimal > args.max_num_samples and args.optimal and args.suboptimal: + break + if ep_reward > args.threshold and args.optimal: + state_dict['optimal'] = [True] * len(state_dict['action']) + num_optimal += 1 + dataload.append(state_dict) + if ep_reward <= args.threshold and args.suboptimal: + state_dict['optimal'] = [False] * len(state_dict['action']) + num_suboptimal += 1 + dataload.append(state_dict) test_running_reward += ep_reward print('Episode: {} \t\t Reward: {}'.format(ep, round(ep_reward, 2))) + ep_reward = 0 env.close() print("============================================================================================") - avg_test_reward = test_running_reward / total_test_episodes + avg_test_reward = test_running_reward / args.num_episodes avg_test_reward = round(avg_test_reward, 2) print("average test reward : " + str(avg_test_reward)) - + if not args.dontsave and args.optimal: + with open(optimal_data_path, 'wb') as file: + pickle.dump(dataload, file) + print("optimal data saved at : " + optimal_data_path + " with " + str(num_optimal) + " samples") + if not args.dontsave and args.suboptimal: + with open(suboptimal_data_path, 'wb') as file: + pickle.dump(dataload, file) + print("suboptimal data saved at : " + suboptimal_data_path + " with " + str(num_suboptimal) + " samples") print("============================================================================================") + print("length of dataload ", len(dataload)) + print("feilds of dataload ", dataload[0].keys()) + for i in range(args.num_episodes): + accumulator = 0 + accumulator2 = 0 + initial_state = dataload[i]['state'][0] + env.reset(goal = initial_state[7:9]) + print('goal location', initial_state[7:9]) + env.reset_with_obs(initial_state) + # print("initial state ", initial_state) + # print("initial state as per the model ", env.get_obs()) + assert np.allclose(env.get_obs(), initial_state) + if args.render: + env.render() + for step in range(len(dataload[i]['reward'])): + action = dataload[i]['action'][step] + # print(action.dtype) + next_state, reward, done, _, info= env.step(action) + # print("next state as per the model ", next_state) + # print("next state as per the data ", dataload[i]['state'][step+1]) + try: + assert np.allclose(next_state, dataload[i]['state'][step+1]) + except: + print("the two states are not the same ", next_state, dataload[i]['state'][step+1]) + accumulator += dataload[i]['reward'][step] + accumulator2 += reward + if args.render: + env.render() + if args.use_sleep: + time.sleep(0.05) + time.sleep(0.1) + print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2)) + env.close() + + + if __name__ == '__main__': diff --git a/train.py b/train.py index 4f06358..b36f36f 100644 --- a/train.py +++ b/train.py @@ -7,16 +7,26 @@ import numpy as np import gym -import roboschool +import driving +# import roboschool +import argparse from PPO import PPO + ################################### Training ################################### def train(): print("============================================================================================") ####### initialize environment hyperparameters ###### - env_name = "RoboschoolWalker2d-v1" + parser = argparse.ArgumentParser(description='Test the model') + parser.add_argument('--seed', type=int, default=1001) + parser.add_argument('--goalx', type=int, default=15) + parser.add_argument('--goaly', type=int, default=38) + parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0') + parser.add_argument('--render', action='store_true') + parser.add_argument('--use-sleep', action='store_true') + args = parser.parse_args() has_continuous_action_space = True # continuous action space; else discrete @@ -48,12 +58,19 @@ def train(): random_seed = 0 # set random seed if required (0 = no random seed) ##################################################### - print("training environment name : " + env_name) - - env = gym.make(env_name) + print("training environment name : " + args.env) + env = gym.make(args.env) + env.set_goal(args.goalx, args.goaly) + print(env.goal) + for i in range(100): + env.reset() + env.render() + time.sleep(0.01) + env.close() # state space dimension state_dim = env.observation_space.shape[0] + print(state_dim) # action space dimension if has_continuous_action_space: @@ -68,7 +85,7 @@ def train(): if not os.path.exists(log_dir): os.makedirs(log_dir) - log_dir = log_dir + '/' + env_name + '/' + log_dir = log_dir + '/' + args.env + '/' if not os.path.exists(log_dir): os.makedirs(log_dir) @@ -78,26 +95,28 @@ def train(): run_num = len(current_num_files) #### create new log file for each run - log_f_name = log_dir + '/PPO_' + env_name + "_log_" + str(run_num) + ".csv" + log_f_name = log_dir + '/PPO_' + args.env + "_log_" + str(run_num) + ".csv" - print("current logging run number for " + env_name + " : ", run_num) + print("current logging run number for " + args.env + " : ", run_num) print("logging at : " + log_f_name) ##################################################### ################### checkpointing ################### - run_num_pretrained = 0 #### change this to prevent overwriting weights in same env_name folder + run_num_pretrained = 0 #### change this to prevent overwriting weights in same args.env folder directory = "PPO_preTrained" if not os.path.exists(directory): os.makedirs(directory) - directory = directory + '/' + env_name + '/' + directory = directory + '/' + args.env + '/' if not os.path.exists(directory): os.makedirs(directory) - checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, random_seed, run_num_pretrained) + checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) + best_model_path = directory + "PPO_{}_{}_{}_{}_{}best.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained) print("save checkpoint path : " + checkpoint_path) + print("save best model path : " + best_model_path) ##################################################### @@ -163,7 +182,7 @@ def train(): time_step = 0 i_episode = 0 - + max_reward = -np.inf # training loop while time_step <= max_training_timesteps: @@ -173,8 +192,10 @@ def train(): for t in range(1, max_ep_len+1): # select action with policy + if args.render: + env.render() action = ppo_agent.select_action(state) - state, reward, done, _ = env.step(action) + state, reward, done, _ , _= env.step(action) # saving reward and is_terminals ppo_agent.buffer.rewards.append(reward) @@ -220,6 +241,10 @@ def train(): if time_step % save_model_freq == 0: print("--------------------------------------------------------------------------------------------") print("saving model at : " + checkpoint_path) + if print_avg_reward > max_reward: + max_reward = print_avg_reward + ppo_agent.save(best_model_path) + print("best model saved with reward as ", max_reward) ppo_agent.save(checkpoint_path) print("model saved") print("Elapsed Time : ", datetime.now().replace(microsecond=0) - start_time) @@ -228,6 +253,8 @@ def train(): # break; if the episode is over if done: break + if args.use_sleep: + time.sleep(0.01) print_running_reward += current_ep_reward print_running_episodes += 1 @@ -243,6 +270,7 @@ def train(): # print total training time print("============================================================================================") end_time = datetime.now().replace(microsecond=0) + print("model with best average reward ", max_reward) print("Started training at (GMT) : ", start_time) print("Finished training at (GMT) : ", end_time) print("Total training time : ", end_time - start_time) diff --git a/train_load.sh b/train_load.sh new file mode 100755 index 0000000..02af589 --- /dev/null +++ b/train_load.sh @@ -0,0 +1,18 @@ +# python3 train.py --goalx 15 --goaly 38 --env ContinuousSlowRandom-v0 +# python3 train.py --goalx 10 --goaly 38 --env ContinuousSlowRandom-v0 +# python3 train.py --goalx 20 --goaly 38 --env ContinuousSlowRandom-v0 +# # python3 train.py --goalx 15 --goaly 38 --env ContinuousFastRandom-v0 +# # python3 train.py --goalx 10 --goaly 38 --env ContinuousFastRandom-v0 +# # python3 train.py --goalx 20 --goaly 38 --env ContinuousFastRandom-v0 +# python3 test.py --goalx 15 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal --threshold -1000 +# python3 test.py --goalx 10 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal --threshold -1000 +# python3 test.py --goalx 20 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal --threshold -1000 +# python3 test.py --goalx 15 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal +# python3 test.py --goalx 10 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal +# python3 test.py --goalx 20 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal +python3 test.py --goalx 15 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal --threshold -1000 +# python3 test.py --goalx 10 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal --threshold -1000 +python3 test.py --goalx 20 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal --threshold -1000 +# python3 test.py --goalx 15 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal +# python3 test.py --goalx 10 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal +# python3 test.py --goalx 20 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal