diff --git a/__pycache__/PPO.cpython-310.pyc b/__pycache__/PPO.cpython-310.pyc
new file mode 100644
index 0000000..00b2a8a
Binary files /dev/null and b/__pycache__/PPO.cpython-310.pyc differ
diff --git a/builddata.py b/builddata.py
new file mode 100644
index 0000000..dc10b8f
--- /dev/null
+++ b/builddata.py
@@ -0,0 +1,80 @@
+
+import pickle
+import random
+import os
+
+# Define the paths to the pickle files
+paths = {'optimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl',
+         'suboptimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl',
+         'optimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl',
+         'suboptimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl',
+         'optimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl',
+         'suboptimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl',
+         'optimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl',
+         'suboptimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl',
+         'optimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl',
+         'suboptimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl',
+         'optimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl',
+         'suboptimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl'}
+
+
+all_list = []
+# Load the lists from pickle files
+
+
+def select_random_elements(input_list, percentage):
+    num_elements = int(len(input_list) * percentage / 100)
+    return random.sample(input_list, num_elements)
+
+def combine(path1, path2, path3):
+    with open(path1, 'rb') as f:
+        list1 = pickle.load(f)
+
+    with open(path2, 'rb') as f:
+        list2 = pickle.load(f)
+
+    with open(path3, 'rb') as f:
+        list3 = pickle.load(f)
+    print(len(list1), len(list2), len(list3))
+    selected_list1 = select_random_elements(list1, 33)
+    selected_list2 = select_random_elements(list2, 33)
+    selected_list3 = select_random_elements(list3, 34)
+    
+    combined_list = selected_list1 + selected_list2 + selected_list3
+    random.shuffle(combined_list)
+    print(len(combined_list))
+    # Print the combined list
+    return combined_list
+
+combined_list1 = combine(paths['optimalfast1038'], paths['optimalfast1538'], paths['optimalfast2038'])
+combined_list2 = combine(paths['suboptimalfast1038'], paths['suboptimalfast1538'], paths['suboptimalfast2038'])
+combined_list3 = combine(paths['optimalslow1038'], paths['optimalslow1538'], paths['optimalslow2038'])
+combined_list4 = combine(paths['suboptimalslow1038'], paths['suboptimalslow1538'], paths['suboptimalslow2038'])
+
+
+
+directory1 = 'new_data/ContinuousFastRandom-v0'
+directory2 = 'new_data/ContinuousSlowRandom-v0'
+
+if not os.path.exists(directory1):
+    os.makedirs(directory1)
+if not os.path.exists(directory2):
+    os.makedirs(directory2)
+
+path1 = directory1 + '/combined_list_fast_optimal.pkl'
+path2 = directory1 + '/combined_list_fast_suboptimal.pkl'
+path3 = directory2 + '/combined_list_slow_optimal.pkl'
+path4 = directory2 + '/combined_list_slow_suboptimal.pkl'
+
+with open(path1, 'wb') as f:
+    pickle.dump(combined_list1, f)
+with open(path2, 'wb') as f:
+    pickle.dump(combined_list2, f)
+with open(path3, 'wb') as f:
+    pickle.dump(combined_list3, f)
+with open(path4, 'wb') as f:
+    pickle.dump(combined_list4, f)
+print('done')
+
+
+
diff --git a/demo_code.py b/demo_code.py
new file mode 100644
index 0000000..dca6333
--- /dev/null
+++ b/demo_code.py
@@ -0,0 +1,216 @@
+import os
+import glob
+import time
+from datetime import datetime
+
+import torch
+import numpy as np
+import time
+import gym
+import pickle
+# import roboschool
+import driving
+import argparse
+from PPO import PPO
+
+
+#################################### Testing ###################################
+def test():
+    print("============================================================================================")
+
+    ################## hyperparameters ##################
+    parser = argparse.ArgumentParser(description='Test the model')
+    parser.add_argument('--seed', type=int, default=1001)
+    parser.add_argument('--goalx', type=int, default=15)
+    parser.add_argument('--goaly', type=int, default=38)
+    parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0')
+    parser.add_argument('--render', action='store_true')
+    parser.add_argument('--use-sleep', action='store_true')
+    parser.add_argument('--optimal', action='store_true')
+    parser.add_argument('--suboptimal', action='store_true')
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_num_samples', type=int, default=1000)
+    parser.add_argument('--threshold', type=float, default=0)
+    parser.add_argument('--dontsave', action='store_true')
+    args = parser.parse_args()
+    # args.env = "CartPole-v1"
+    # has_continuous_action_space = False
+    # max_ep_len = 400
+    # action_std = None
+
+    # args.env = "LunarLander-v2"
+    # has_continuous_action_space = False
+    # max_ep_len = 300
+    # action_std = None
+
+    # args.env = "BipedalWalker-v2"
+    # has_continuous_action_space = True
+    # max_ep_len = 1500           # max timesteps in one episode
+    # action_std = 0.1            # set same std for action distribution which was used while saving
+
+    has_continuous_action_space = True
+    max_ep_len = 1000           # max timesteps in one episode
+    action_std = 0.1            # set same std for action distribution which was used while saving
+
+    # delay = True               # add delay b/w frames to make video like real time
+    # render = True              # render environment on screen
+    frame_delay = 0             # if required; add delay b/w frames
+
+    # total_test_episodes = 10    # total num of testing episodes
+
+    K_epochs = 80               # update policy for K epochs
+    eps_clip = 0.2              # clip parameter for PPO
+    gamma = 0.99                # discount factor
+
+    lr_actor = 0.0003           # learning rate for actor
+    lr_critic = 0.001 
+              # learning rate for critic
+    run_best_model = False
+    if args.optimal:
+        run_best_model = True
+          # load and run the best saved model
+    #####################################################
+
+    env = gym.make(args.env)
+    env.set_goal(args.goalx, args.goaly)
+
+    # state space dimension
+    state_dim = env.observation_space.shape[0]
+
+    # action space dimension
+    if has_continuous_action_space:
+        action_dim = env.action_space.shape[0]
+    else:
+        action_dim = env.action_space.n
+
+    # initialize a PPO agent
+    ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std)
+
+    # preTrained weights directory
+
+    random_seed = 0             #### set this to load a particular checkpoint trained on random seed
+    run_num_pretrained = 0      #### set this to load a particular checkpoint num
+
+    directory = "PPO_preTrained" + '/' + args.env + '/'
+    data_directory = "new_data"
+    if not os.path.exists(directory):
+        print("No directory found")
+        exit()
+    if not os.path.exists(data_directory):
+        os.makedirs(data_directory)
+    data_directory = data_directory + '/' + args.env + '/'
+    if not os.path.exists(data_directory):
+        os.makedirs(data_directory)
+    
+    if not run_best_model:
+        checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+    else:
+        checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}best.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+
+    optimal_data_path = data_directory + "optimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+    suboptimal_data_path = data_directory + "suboptimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+    print("loading network from : " + checkpoint_path)
+
+    ppo_agent.load(checkpoint_path)
+
+    print("--------------------------------------------------------------------------------------------")
+
+    test_running_reward = 0
+    dataload = []
+    num_optimal = 1
+    num_suboptimal = 1
+    for ep in range(1, args.num_episodes+1):
+        ep_reward = 0
+        state = env.reset()
+        state_dict = {'state' : [], 'action': [], 'reward': [], 'optimal': []}
+        state_dict['state'].append(state)
+        for t in range(1, max_ep_len+1):
+            action = ppo_agent.select_action(state)
+            print(action.dtype)
+            state, reward, done, _, _= env.step(action)
+            ep_reward += reward
+            state_dict['state'].append(state)
+            state_dict['action'].append(action)
+            state_dict['reward'].append(reward)
+            if args.render:
+                env.render()
+                # time.sleep(frame_delay)
+            if args.use_sleep:
+                time.sleep(0.05)
+            if done:
+                break
+
+        # clear buffer
+        ppo_agent.buffer.clear()
+        if num_optimal > args.max_num_samples and args.optimal and not args.suboptimal:
+            break
+        if num_suboptimal > args.max_num_samples and args.suboptimal and not args.optimal:
+            break
+        if num_optimal > args.max_num_samples and num_suboptimal > args.max_num_samples and args.optimal and args.suboptimal:
+            break
+        if ep_reward > args.threshold and args.optimal:
+            state_dict['optimal'] = [True] * len(state_dict['action'])
+            num_optimal += 1
+            dataload.append(state_dict)
+        if ep_reward <= args.threshold and args.suboptimal:
+            state_dict['optimal'] = [False] * len(state_dict['action'])
+            num_suboptimal += 1
+            dataload.append(state_dict)
+        test_running_reward +=  ep_reward
+        print('Episode: {} \t\t Reward: {}'.format(ep, round(ep_reward, 2)))
+        
+        ep_reward = 0
+
+    env.close()
+
+    print("============================================================================================")
+
+    avg_test_reward = test_running_reward / args.num_episodes
+    avg_test_reward = round(avg_test_reward, 2)
+    print("average test reward : " + str(avg_test_reward))
+    if not args.dontsave and args.optimal:
+        with open(optimal_data_path, 'wb') as file:
+            pickle.dump(dataload, file)
+        print("optimal data saved at : " + optimal_data_path + " with " + str(num_optimal) + " samples")
+    if not args.dontsave and args.suboptimal:
+        with open(suboptimal_data_path, 'wb') as file:
+            pickle.dump(dataload, file)
+        print("suboptimal data saved at : " + suboptimal_data_path + " with " + str(num_suboptimal) + " samples")
+    print("============================================================================================")
+    print(type(dataload))
+    for i in range(args.num_episodes):
+        accumulator = 0
+        accumulator2 = 0
+        initial_state = dataload[i]['state'][0]
+        env.reset(goal = initial_state[7:9])
+        print('goal location', initial_state[7:9])
+        env.reset_with_obs(initial_state)
+        # print("initial state ", initial_state)
+        # print("initial state as per the model ", env.get_obs())
+        assert np.allclose(env.get_obs(), initial_state)
+        if args.render:
+            env.render()
+        for step in range(len(dataload[i]['reward'])):
+            action = dataload[i]['action'][step]
+            print(action.dtype)
+            next_state, reward, done, _, info= env.step(action)
+            print("next state as per the model ", next_state)
+            print("next state as per the data ", dataload[i]['state'][step+1])
+            try:
+                assert np.allclose(next_state, dataload[i]['state'][step+1])
+            except:
+                print("the two states are not the same ", next_state, dataload[i]['state'][step+1])
+            accumulator += dataload[i]['reward'][step]
+            accumulator2 += reward
+            if args.render:
+                env.render()
+            if args.use_sleep: 
+                time.sleep(0.05)
+        time.sleep(0.1)
+        print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2))
+    env.close()
+
+
+if __name__ == '__main__':
+
+    test()
diff --git a/imperfect_envs/README.md b/imperfect_envs/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/imperfect_envs/driving/__init__.py b/imperfect_envs/driving/__init__.py
new file mode 100644
index 0000000..24e6044
--- /dev/null
+++ b/imperfect_envs/driving/__init__.py
@@ -0,0 +1,9 @@
+from gym.envs.registration import register
+
+register(id="Continuous-v0", entry_point="driving.envs:GridworldContinuousEnv")
+# register(id="ContinuousRandom-v0", entry_point="driving.envs:GridworldContinuousRandomInitEnv")
+# register(id="ContinuousRandom1-v0", entry_point="driving.envs:GridworldContinuousRandomInitEnv1",max_episode_steps=400)
+# register(id="ContinuousLeftRandom1-v0", entry_point="driving.envs:GridworldContinuousLeftRandomInitEnv1")
+# register(id="ContinuousRightRandom1-v0", entry_point="driving.envs:GridworldContinuousRightRandomInitEnv1")
+register(id="ContinuousFastRandom-v0", entry_point="driving.envs:GridworldContinuousFastRandomInitEnv",max_episode_steps=400)
+register(id="ContinuousSlowRandom-v0", entry_point="driving.envs:GridworldContinuousSlowRandomInitEnv",max_episode_steps=400)
diff --git a/imperfect_envs/driving/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/driving/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..09a0de3
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/__init__.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/__pycache__/agents.cpython-310.pyc b/imperfect_envs/driving/__pycache__/agents.cpython-310.pyc
new file mode 100644
index 0000000..79617b3
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/agents.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/__pycache__/entities.cpython-310.pyc b/imperfect_envs/driving/__pycache__/entities.cpython-310.pyc
new file mode 100644
index 0000000..3298097
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/entities.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/__pycache__/geometry.cpython-310.pyc b/imperfect_envs/driving/__pycache__/geometry.cpython-310.pyc
new file mode 100644
index 0000000..8027f5f
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/geometry.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/__pycache__/graphics.cpython-310.pyc b/imperfect_envs/driving/__pycache__/graphics.cpython-310.pyc
new file mode 100644
index 0000000..775b215
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/graphics.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/__pycache__/visualizer.cpython-310.pyc b/imperfect_envs/driving/__pycache__/visualizer.cpython-310.pyc
new file mode 100644
index 0000000..7d3898f
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/visualizer.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/__pycache__/world.cpython-310.pyc b/imperfect_envs/driving/__pycache__/world.cpython-310.pyc
new file mode 100644
index 0000000..0ffd222
Binary files /dev/null and b/imperfect_envs/driving/__pycache__/world.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/agents.py b/imperfect_envs/driving/agents.py
new file mode 100644
index 0000000..afd3c2b
--- /dev/null
+++ b/imperfect_envs/driving/agents.py
@@ -0,0 +1,62 @@
+from driving.entities import RectangleEntity, CircleEntity
+from driving.geometry import Point
+
+# For colors, we use tkinter colors. See http://www.science.smith.edu/dftwiki/index.php/Color_Charts_for_TKinter
+
+
+class Car(RectangleEntity):
+    def __init__(
+        self,
+        center: Point,
+        heading: float,
+        color: str = "red",
+        min_acc: float = -4.0,
+        max_acc: float = 4.0,
+    ):
+        size = Point(2.0, 1.0)
+        movable = True
+        friction = 0.06
+        super(Car, self).__init__(
+            center, heading, size, movable, friction, min_acc=min_acc, max_acc=max_acc
+        )
+        self.color = color
+        self.collidable = True
+
+
+class Pedestrian(CircleEntity):
+    def __init__(self, center: Point, heading: float, color: str = "LightSalmon2"):
+        radius = 0.4
+        movable = True
+        friction = 0.2
+        super(Pedestrian, self).__init__(center, heading, radius, movable, friction)
+        self.color = color
+        self.collidable = True
+
+
+class Building(RectangleEntity):
+    def __init__(self, center: Point, size: Point, color: str = "gray26", heading=0.0):
+        movable = False
+        friction = 0.0
+        super(Building, self).__init__(center, heading, size, movable, friction)
+        self.color = color
+        self.collidable = True
+
+
+class Painting(RectangleEntity):
+    def __init__(self, center: Point, size: Point, color: str = "gray26"):
+        heading = 0.0
+        movable = False
+        friction = 0.0
+        super(Painting, self).__init__(center, heading, size, movable, friction)
+        self.color = color
+        self.collidable = False
+
+
+class Goal(RectangleEntity):
+    def __init__(self, center: Point, radius: float, heading: float, color: str = "LightSalmon2"):
+        size = Point(radius, radius)
+        movable = True
+        friction = 0.2
+        super(Goal, self).__init__(center, heading, size, movable, friction)
+        self.color = color
+        self.collidable = True
diff --git a/imperfect_envs/driving/entities.py b/imperfect_envs/driving/entities.py
new file mode 100644
index 0000000..aa58c7a
--- /dev/null
+++ b/imperfect_envs/driving/entities.py
@@ -0,0 +1,241 @@
+import math
+from typing import Text, Union
+import numpy as np
+from driving.geometry import Point, Rectangle, Circle
+import copy
+
+
+def get_entity_dynamics(friction, min_speed, max_speed, min_acc, max_acc, xnp=np):
+    # xnp: Either numpy or jax.numpy.
+
+    def entity_dynamics(x, u, dt):
+        # x: (x, y, xp, yp, theta, ang_vel, acceleration)
+        # u: (steering angle, acceleration)
+        center = x[:2]
+        velocity = x[2:4]
+        speed = xnp.linalg.norm(velocity, ord=2)
+        heading = x[4]
+        angular_velocity = x[5]
+        old_acceleration = x[6]
+        steering_angle = u[0]
+        acceleration = xnp.clip(u[1], min_acc, max_acc)
+
+        new_angular_velocity = speed * steering_angle
+        new_acceleration = acceleration - friction * speed
+
+        new_heading = heading + (angular_velocity + new_angular_velocity) * dt / 2.0
+        new_speed = xnp.clip(
+            speed + (old_acceleration + new_acceleration) * dt / 2.0, min_speed, max_speed
+        )
+
+        next_speed = (speed + new_speed) / 2.0
+        next_heading = (new_heading + heading) / 2.0
+        new_velocity = next_speed * xnp.array((xnp.cos(next_heading), xnp.sin(next_heading)))
+
+        new_center = center + (velocity + new_velocity) * dt / 2.0
+        return xnp.concatenate(
+            (
+                new_center,
+                new_velocity,
+                xnp.stack([new_heading, new_angular_velocity, new_acceleration]),
+            )
+        )
+
+    return entity_dynamics
+
+
+class Entity:
+    def __init__(
+        self,
+        center: Point,
+        heading: float,
+        movable: bool = True,
+        friction: float = 0.0,
+        min_speed: float = 0.0,
+        max_speed: float = math.inf,
+        min_acc: float = -math.inf,
+        max_acc: float = math.inf,
+    ):
+        self.center = center  # this is x, y
+        self.heading = heading
+        self.movable = movable
+        self.color = "ghost white"
+        self.collidable = True
+        self.obj = None  # MUST be set by subclasses.
+        if movable:
+            self.friction = friction
+            self.velocity = Point(0, 0)  # this is xp, yp
+            self.acceleration = 0  # this is vp (or speedp)
+            self.angular_velocity = 0  # this is headingp
+            self.inputSteering = 0
+            self.inputAcceleration = 0
+            self.min_speed = min_speed
+            self.max_speed = max_speed
+            self.min_acc = min_acc
+            self.max_acc = max_acc
+            self.entity_dynamics = get_entity_dynamics(
+                friction, self.min_speed, self.max_speed, self.min_acc, self.max_acc, xnp=np
+            )
+
+    @property
+    def speed(self) -> float:
+        return self.velocity.norm(p=2) if self.movable else 0
+
+    def set_control(self, inputSteering: float, inputAcceleration: float):
+        self.inputSteering = inputSteering
+        self.inputAcceleration = inputAcceleration
+
+    @property
+    def state(self):
+        return np.array(
+            (
+                self.x,
+                self.y,
+                self.xp,
+                self.yp,
+                self.heading,
+                self.angular_velocity,
+                self.acceleration,
+            )
+        )
+
+    @state.setter
+    def state(self, new_x):
+        self.center = Point(new_x[0], new_x[1])
+        self.velocity = Point(new_x[2], new_x[3])
+        self.heading = new_x[4]
+        self.angular_velocity = new_x[5]
+        self.acceleration = new_x[6]
+        self.buildGeometry()
+
+    def tick(self, dt: float):
+        if self.movable:
+            x = self.state
+            u = np.array((self.inputSteering, self.inputAcceleration))
+            new_x = self.entity_dynamics(x, u, dt)
+            self.state = new_x
+
+    def buildGeometry(self):  # builds the obj
+        raise NotImplementedError
+
+    def collidesWith(self, other: Union["Point", "Entity"]) -> bool:
+        if isinstance(other, Entity):
+            return self.obj.intersectsWith(other.obj)
+        elif isinstance(other, Point):
+            return self.obj.intersectsWith(other)
+        else:
+            raise NotImplementedError
+
+    def distanceTo(self, other: Union["Point", "Entity"]) -> float:
+        if isinstance(other, Entity):
+            return self.obj.distanceTo(other.obj)
+        elif isinstance(other, Point):
+            return self.obj.distanceTo(other)
+        else:
+            raise NotImplementedError
+
+    def copy(self):
+        return copy.deepcopy(self)
+
+    @property
+    def x(self):
+        return self.center.x
+
+    @property
+    def y(self):
+        return self.center.y
+
+    @property
+    def xp(self):
+        return self.velocity.x
+
+    @property
+    def yp(self):
+        return self.velocity.y
+
+
+class RectangleEntity(Entity):
+    def __init__(
+        self,
+        center: Point,
+        heading: float,
+        size: Point,
+        movable: bool = True,
+        friction: float = 0,
+        **kwargs
+    ):
+        super(RectangleEntity, self).__init__(center, heading, movable, friction, **kwargs)
+        self.size = size
+        self.buildGeometry()
+
+    @property
+    def edge_centers(self):
+        edge_centers = np.zeros((4, 2), dtype=np.float32)
+        x = self.center.x
+        y = self.center.y
+        w = self.size.x
+        h = self.size.y
+        edge_centers[0] = [
+            x + w / 2.0 * np.cos(self.heading),
+            y + w / 2.0 * np.sin(self.heading),
+        ]
+        edge_centers[1] = [
+            x - h / 2.0 * np.sin(self.heading),
+            y + h / 2.0 * np.cos(self.heading),
+        ]
+        edge_centers[2] = [
+            x - w / 2.0 * np.cos(self.heading),
+            y - w / 2.0 * np.sin(self.heading),
+        ]
+        edge_centers[3] = [
+            x + h / 2.0 * np.sin(self.heading),
+            y - h / 2.0 * np.cos(self.heading),
+        ]
+        return edge_centers
+
+    @property
+    def corners(self):
+        ec = self.edge_centers
+        c = np.array([self.center.x, self.center.y])
+        corners = []
+        corners.append(Point(*(ec[1] + ec[0] - c)))
+        corners.append(Point(*(ec[2] + ec[1] - c)))
+        corners.append(Point(*(ec[3] + ec[2] - c)))
+        corners.append(Point(*(ec[0] + ec[3] - c)))
+        return corners
+
+    def buildGeometry(self):
+        C = self.corners
+        self.obj = Rectangle(*C[:-1])  # pylint: disable=no-value-for-parameter
+
+    #def distanceTo(self, other):
+    #    return np.linalg.norm(np.array([self.center.x, self.center.y]) - np.array([other.center.x, other.center.y]), ord=1)
+
+
+class CircleEntity(Entity):
+    def __init__(
+        self,
+        center: Point,
+        heading: float,
+        radius: float,
+        movable: bool = True,
+        friction: float = 0,
+        **kwargs
+    ):
+        super(CircleEntity, self).__init__(center, heading, movable, friction, **kwargs)
+        self.radius = radius
+        self.buildGeometry()
+
+    def buildGeometry(self):
+        self.obj = Circle(self.center, self.radius)
+
+
+class TextEntity(Entity):
+    def __init__(self, center: Point, **kwargs):
+        heading = 0
+        super(TextEntity, self).__init__(center, heading, movable=False, **kwargs)
+        self.text = ""
+
+    def buildGeometry(self):
+        # Represent text geometry as a tiny circle. Not accurate.
+        self.obj = Circle(self.center, 0.01)
diff --git a/imperfect_envs/driving/envs/__init__.py b/imperfect_envs/driving/envs/__init__.py
new file mode 100644
index 0000000..58a2929
--- /dev/null
+++ b/imperfect_envs/driving/envs/__init__.py
@@ -0,0 +1 @@
+from driving.envs.gridworld_continuous import GridworldContinuousEnv, GridworldContinuousSlowRandomInitEnv, GridworldContinuousFastRandomInitEnv, PidVelPolicy
diff --git a/imperfect_envs/driving/envs/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/driving/envs/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..23e7d87
Binary files /dev/null and b/imperfect_envs/driving/envs/__pycache__/__init__.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/envs/__pycache__/gridworld_continuous.cpython-310.pyc b/imperfect_envs/driving/envs/__pycache__/gridworld_continuous.cpython-310.pyc
new file mode 100644
index 0000000..e0f4083
Binary files /dev/null and b/imperfect_envs/driving/envs/__pycache__/gridworld_continuous.cpython-310.pyc differ
diff --git a/imperfect_envs/driving/envs/gridworld_continuous.py b/imperfect_envs/driving/envs/gridworld_continuous.py
new file mode 100644
index 0000000..eecd7f6
--- /dev/null
+++ b/imperfect_envs/driving/envs/gridworld_continuous.py
@@ -0,0 +1,260 @@
+import io
+from typing import Text
+import gym
+from gym import spaces
+from PIL import Image
+import numpy as np
+import scipy.special
+from driving.world import World
+from driving.entities import TextEntity, Entity
+from driving.agents import Car, Building, Goal
+from driving.geometry import Point
+from typing import Tuple
+import sys
+import random
+
+class PidVelPolicy:
+    """PID controller for H that maintains its initial velocity."""
+
+    def __init__(self, dt: float, params: Tuple[float, float, float] = (3.0, 1.0, 6.0)):
+        self._target_vel = None
+        self.previous_error = 0
+        self.integral = 0
+        self.errors = []
+        self.dt = dt
+        self.Kp, self.Ki, self.Kd = params
+
+    def action(self, obs):
+        my_y_dot = obs[3]
+        if self._target_vel is None:
+            self._target_vel = my_y_dot
+        error = self._target_vel - my_y_dot
+        derivative = (error - self.previous_error) * self.dt
+        self.integral = self.integral + self.dt * error
+        acc = self.Kp * error + self.Ki * self.integral + self.Kd * derivative
+        self.previous_error = error
+        self.errors.append(error)
+        return acc
+
+    def reset(self, seed = None):
+        if seed is not None:
+            random.seed(seed)
+        self._target_vel = None
+        self.previous_error = 0
+        self.integral = 0
+        self.errors = []
+
+    def __str__(self):
+        return "PidVelPolicy({})".format(self.dt)
+
+class GridworldContinuousEnv(gym.Env):
+
+    def __init__(self,
+                 dt: float = 0.1,
+                 width: int = 30,
+                 height: int = 40,
+                 time_limit: float = 300.0):
+        super(GridworldContinuousEnv, self).__init__()
+        self.dt = dt
+        self.width = width
+        self.height = height
+        self.world = World(self.dt, width=width, height=height, ppm=6)
+        self.accelerate = PidVelPolicy(self.dt)
+        self.step_num = 0
+        self.time_limit = time_limit
+        self.action_space = spaces.Box(
+            np.array([-1.]), np.array([1.]), dtype=np.float32
+        )
+        self.goal_radius = 2.
+        self.observation_space = spaces.Box(-np.inf, np.inf, shape=(14,))
+        self.start = np.array([self.width/2.,self.goal_radius])
+        self.goal = np.array([self.width/2., self.height-self.goal_radius])
+        self.max_dist = np.linalg.norm(self.goal-self.start,2)
+
+        self.target = [self.height/5., self.height*2./5., self.height*3./5., self.height*4./5., np.inf]
+        self.obstacle_width = 6.
+        self.initial_speed = 3.
+
+    def step(self, action: np.ndarray, verbose: bool = False):
+        self.step_num += 1
+        # for i in range(len(action)):
+        #     action[i] = action[i]*0.1
+        action = action * 0.1
+        # print(action)
+        car = self.world.dynamic_agents[0]
+        goal_loc = self.world.dynamic_agents[1]
+        acc = self.accelerate.action(self._get_obs())
+        action = np.append(action, acc)
+        if self.stop:
+            action = np.array([0, -5])
+        # print(type(car))
+        car.set_control(*action)
+        goal_loc.set_control(0, 0)
+        self.world.tick()
+
+        reward = self.reward(verbose)
+
+        done = False
+        if car.y >= self.height or car.y <= 0 or car.x <= 0 or car.x >= self.width:
+            reward -= 10000
+            done = True
+        if self.step_num >= self.time_limit:
+            done = True
+        if self.car.collidesWith(self.goal_obj):
+            done = True
+            self.stop = True
+        #if self.step_num < 6:
+        #    done = False
+        return self._get_obs(), reward, done, True, {'episode': {'r': reward, 'l': self.step_num}}
+
+    def reset(self, seed = None):
+        self.world.reset()
+        self.stop = False
+        self.target_count = 0
+        if seed is not None:
+            random.seed(seed)
+        self.buildings = [
+            Building(Point(self.width/2., self.height/2.-3), Point(self.obstacle_width,1), "gray80"),
+        ]
+        random_dis = random.random()*2.
+        random_angle = random.random()*2*np.pi
+        init_x = self.start[0] + random_dis*np.cos(random_angle)
+        init_y = self.start[1] + random_dis*np.sin(random_angle)
+        self.car = Car(Point(init_x, init_y), np.pi/2., "blue")
+        self.car.velocity = Point(0, self.initial_speed)
+
+        self.goal_obj = Goal(Point(self.goal[0], self.goal[1]), self.goal_radius, 0.0)
+
+        for building in self.buildings:
+            self.world.add(building)
+        self.world.add(self.car)
+        self.world.add(self.goal_obj)
+        
+        self.last_heading = np.pi / 2
+
+        self.step_num = 0
+        return self._get_obs()
+
+    def reset_with_obs(self, obs):
+        self.world.reset()
+        self.stop = False
+        self.target_count = 0
+
+        self.buildings = [
+            Building(Point(self.width/2., self.height/2.-3), Point(self.obstacle_width,1), "gray80"),
+        ]
+
+        init_x = (obs[0]/2.+0.5)*self.width
+        init_y = (obs[1]/2.+0.5)*self.height
+        self.car = Car(Point(init_x, init_y), np.pi/2., "blue")
+        self.car.velocity = Point(0, self.initial_speed)
+
+        self.goal_obj = Goal(Point(self.goal[0], self.goal[1]), self.goal_radius, 0.0)
+
+        for building in self.buildings:
+            self.world.add(building)
+        self.world.add(self.car)
+        self.world.add(self.goal_obj)
+        
+        self.last_heading = np.pi / 2
+
+        self.step_num = 0
+        return self._get_obs()
+
+    def _get_obs(self):
+        """
+        Get state of car
+        """
+        return_state = np.array(self.world.state)
+        #print(return_state)
+        return_state[1] = 2.* ((return_state[1] / self.height) - 0.5)
+        return_state[0] = 2.* ((return_state[0] / self.width) - 0.5)
+        return_state[2] /= self.initial_speed
+        return_state[3] /= self.initial_speed
+        # print("get_obs return state ", return_state)
+        return return_state
+
+    def inverse_dynamic(self, state, next_state):
+        return (next_state[-2] / np.linalg.norm(self.initial_speed*state[2:4], ord=2))/self.dt
+
+    def reward(self, verbose, weight=10.0):
+        dist_rew = -1. # * (self.car.center.distanceTo(self.goal_obj)/self.max_dist)
+        coll_rew = 0
+        for building in self.buildings:
+            if self.car.collidesWith(building):
+                coll_rew = -1000.
+                break
+
+        goal_rew = 0.0
+        if self.car.collidesWith(self.goal_obj) and (not self.stop):
+            goal_rew = 100.
+
+        extra_rew = 0.
+        #if self.car.x < self.width / 4.:
+        #    extra_rew = (self.width / 4. - self.car.x)/(self.width/4.) * (-1.)
+        #elif self.car.x > self.width * 3. / 4.:
+        #    extra_rew = (self.car.x-self.width * 3. / 4.)/(self.width/4.) * (-1.)
+
+        reward = sum([dist_rew, coll_rew, extra_rew, goal_rew])
+        if verbose: print("dist reward: ", dist_rew,
+                          "goal reward: ", goal_rew,
+                          "extra reward: ", extra_rew,
+                          "reward: ", reward)
+        return reward
+
+    def render(self):
+        self.world.render()
+    
+    def set_goal(self, x, y):
+        self.goal = np.array([x, y])
+
+class GridworldContinuousSlowRandomInitEnv(GridworldContinuousEnv):
+    def reset(self, seed = None):
+        
+        if seed is not None:
+            random.seed(seed)
+        
+        self.world.reset()
+
+        self.stop = False
+        self.target_count = 0
+
+        self.buildings = [
+            Building(Point(self.width/2., self.height/2.-3), Point(self.obstacle_width,1), "gray80"),
+        ]
+
+        while True:
+            random_w = random.random()
+            random_h = random.random()
+            init_x = self.width/2.-(self.obstacle_width/2.+2.) + random_w*(self.obstacle_width+4.)
+            init_y = self.goal_radius + (self.height-3*self.goal_radius)*random_h
+            cond1 = abs(init_x - self.width/2.) < (self.obstacle_width/2.+2.) and init_y-self.height/2. < 3. and init_y-self.height/2.>-13.
+            slope = ((self.height - self.goal_radius) - (self.height/2.-3))/(self.width/4.)
+            #print(slope, init_x, ((self.width/4.-abs(init_x - self.width/2.)) * slope + (self.height/2.-3.)))
+            cond2 = init_y < ((self.width/4.-abs(init_x - self.width/2.)) * slope + (self.height/2.-3.))
+            if cond2 and not cond1:
+                break
+        init_heading = np.pi/2. # np.arctan2(self.goal[1] - init_y, self.goal[0]-init_x)
+        self.car = Car(Point(init_x, init_y), init_heading, "blue")
+        self.car.velocity = Point(0, self.initial_speed)
+
+        self.goal_obj = Goal(Point(self.goal[0], self.goal[1]), self.goal_radius, 0.0)
+
+        for building in self.buildings:
+            self.world.add(building)
+        self.world.add(self.car)
+        self.world.add(self.goal_obj)
+        
+        self.last_heading = np.pi / 2
+
+        self.step_num = 0
+        return self._get_obs()
+
+class GridworldContinuousFastRandomInitEnv(GridworldContinuousSlowRandomInitEnv):
+    def __init__(self,
+                 dt: float = 0.1,
+                 width: int = 30,
+                 height: int = 40,
+                 time_limit: float = 300.0):
+        super(GridworldContinuousFastRandomInitEnv, self).__init__(dt, width, height, time_limit)
+        self.initial_speed = 9.
diff --git a/imperfect_envs/driving/geometry.py b/imperfect_envs/driving/geometry.py
new file mode 100644
index 0000000..196b0fa
--- /dev/null
+++ b/imperfect_envs/driving/geometry.py
@@ -0,0 +1,243 @@
+import numpy as np
+from typing import Union
+
+
+class Point:
+    def __init__(self, x: float, y: float):
+        self.x = float(x)
+        self.y = float(y)
+
+    def __str__(self):
+        return "Point(" + str(self.x) + ", " + str(self.y) + ")"
+
+    def __add__(self, other: "Point") -> "Point":
+        return Point(self.x + other.x, self.y + other.y)
+
+    def __sub__(self, other: "Point") -> "Point":
+        return Point(self.x - other.x, self.y - other.y)
+
+    def norm(self, p: int = 2) -> float:
+        return (self.x ** p + self.y ** p) ** (1.0 / p)
+
+    def dot(self, other: "Point") -> float:
+        return self.x * other.x + self.y * other.y
+
+    def __mul__(self, other: float) -> "Point":
+        return Point(other * self.x, other * self.y)
+
+    def __rmul__(self, other: float) -> "Point":
+        return self.__mul__(other)
+
+    def __truediv__(self, other: float) -> "Point":
+        return self.__mul__(1.0 / other)
+
+    def isInside(self, other: Union["Line", "Rectangle", "Circle"]) -> bool:
+        if isinstance(other, Line):
+            AM = Line(other.p1, self)
+            MB = Line(self, other.p2)
+            return np.isclose(np.abs(AM.dot(MB)), AM.length * MB.length)
+
+        elif isinstance(other, Rectangle):
+            # Based on https://stackoverflow.com/a/2763387
+            AB = Line(other.c1, other.c2)
+            AM = Line(other.c1, self)
+            BC = Line(other.c2, other.c3)
+            BM = Line(other.c2, self)
+
+            return 0 <= AB.dot(AM) <= AB.dot(AB) and 0 <= BC.dot(BM) <= BC.dot(BC)
+
+        elif isinstance(other, Circle):
+            return (self - other.m).norm(p=2) <= other.r
+
+        raise NotImplementedError
+
+    def distanceTo(self, other: Union["Point", "Line", "Rectangle", "Circle"]) -> float:
+        if isinstance(other, Point):
+            return (self - other).norm(p=2)
+
+        elif isinstance(other, Line):
+            # Based on https://math.stackexchange.com/a/330329
+            s2_minus_s1 = other.p2 - other.p1
+            that = (self - other.p1).dot(s2_minus_s1) / s2_minus_s1.dot(s2_minus_s1)
+            tstar = np.minimum(1, np.maximum(0, that))
+            return (other.p1 + tstar * s2_minus_s1 - self).norm(p=2)
+
+        elif isinstance(other, Rectangle):
+            if self.isInside(other):
+                return 0
+            E = other.edges
+            return np.min([self.distanceTo(e) for e in E])
+
+        elif isinstance(other, Circle):
+            return np.maximum(0, self.distanceTo(other.m) - other.r)
+
+        else:
+            try:
+                return other.distanceTo(self)
+            except NameError:
+                raise NotImplementedError
+            print("Something went wrong!")
+
+
+def onSegment(p: Point, q: Point, r: Point) -> bool:
+    """
+    Given three colinear points p, q, r, the function checks if 
+    point q lies on line segment 'pr' 
+    """
+    return (
+        q.x <= np.maximum(p.x, r.x)
+        and q.x >= np.minimum(p.x, r.x)
+        and q.y <= np.maximum(p.y, r.y)
+        and q.y >= np.minimum(p.y, r.y)
+    )
+
+
+def orientation(p: Point, q: Point, r: Point) -> int:
+    """
+    To find orientation of ordered triplet (p, q, r). 
+    The function returns following values 
+    0 --> p, q and r are colinear 
+    1 --> Clockwise 
+    2 --> Counterclockwise 
+    """
+    # See https://www.geeksforgeeks.org/orientation-3-ordered-points/ for details of below formula.
+    val = (q.y - p.y) * (r.x - q.x) - (q.x - p.x) * (r.y - q.y)
+    if val == 0:
+        return 0  # colinear
+    return 1 if val > 0 else 2  # clock or counterclock wise
+
+
+class Line:
+    def __init__(self, p1: Point, p2: Point):
+        self.p1 = p1
+        self.p2 = p2
+
+    def __str__(self):
+        return "Line(" + str(self.p1) + ", " + str(self.p2) + ")"
+
+    def intersectsWith(self, other: Union["Line", "Rectangle", "Circle"]):
+        if isinstance(other, Line):
+            p1 = self.p1
+            q1 = self.p2
+            p2 = other.p1
+            q2 = other.p2
+
+            # Based on https://www.geeksforgeeks.org/check-if-two-given-line-segments-intersect/
+            # Find the four orientations needed for general and special cases
+            o1 = orientation(p1, q1, p2)
+            o2 = orientation(p1, q1, q2)
+            o3 = orientation(p2, q2, p1)
+            o4 = orientation(p2, q2, q1)
+
+            # General case
+            if o1 != o2 and o3 != o4:
+                return True
+
+            # Special Cases
+            # p1, q1 and p2 are colinear and p2 lies on segment p1q1
+            if o1 == 0 and onSegment(p1, p2, q1):
+                return True
+
+            # p1, q1 and q2 are colinear and q2 lies on segment p1q1
+            if o2 == 0 and onSegment(p1, q2, q1):
+                return True
+
+            # p2, q2 and p1 are colinear and p1 lies on segment p2q2
+            if o3 == 0 and onSegment(p2, p1, q2):
+                return True
+
+            # p2, q2 and q1 are colinear and q1 lies on segment p2q2
+            if o4 == 0 and onSegment(p2, q1, q2):
+                return True
+
+            return False  # Doesn't fall in any of the above cases
+
+        elif isinstance(other, Rectangle):
+            if self.p1.isInside(other) or self.p2.isInside(other):
+                return True
+            E = other.edges
+            for edge in E:
+                if self.intersectsWith(edge):
+                    return True
+            return False
+
+        elif isinstance(other, Circle):
+            return other.m.distanceTo(self) <= other.r
+
+        raise NotImplementedError
+
+    @property
+    def length(self):
+        return self.p1.distanceTo(self.p2)
+
+    def dot(self, other: "Line") -> float:  # assumes Line is a vector from p1 to p2
+        v1 = self.p2 - self.p1
+        v2 = other.p2 - other.p1
+        return v1.dot(v2)
+
+    def distanceTo(self, other: "Point") -> float:
+        if isinstance(other, Point):
+            return other.distanceTo(self)
+
+
+class Rectangle:
+    # 3 points are enough to represent a rectangle
+    def __init__(self, c1: Point, c2: Point, c3: Point):
+        self.c1 = c1
+        self.c2 = c2
+        self.c3 = c3
+        self.c4 = c3 + c1 - c2
+
+    def __str__(self):
+        return "Rectangle({}, {}, {}, {})".format(self.c1, self.c2, self.c3, self.c4)
+
+    @property
+    def edges(self):
+        e1 = Line(self.c1, self.c2)
+        e2 = Line(self.c2, self.c3)
+        e3 = Line(self.c3, self.c4)
+        e4 = Line(self.c4, self.c1)
+        return [e1, e2, e3, e4]
+
+    @property
+    def corners(self):
+        return [self.c1, self.c2, self.c3, self.c4]
+
+    def intersectsWith(self, other: Union["Line", "Rectangle", "Circle"]) -> bool:
+        if isinstance(other, Line):
+            return other.intersectsWith(self)
+
+        elif isinstance(other, Rectangle) or isinstance(other, Circle):
+            E = self.edges
+            for e in E:
+                if e.intersectsWith(other):
+                    return True
+            return False
+
+        raise NotImplementedError
+
+    def distanceTo(self, other: "Point") -> float:
+        if isinstance(other, Point):
+            return other.distanceTo(self)
+
+
+class Circle:
+    def __init__(self, m: Point, r: float):
+        self.m = m
+        self.r = r
+
+    def __str__(self):
+        return "Circle(" + str(self.m) + ", radius = " + str(self.r) + ")"
+
+    def intersectsWith(self, other: Union["Line", "Rectangle", "Circle"]):
+        if isinstance(other, Line) or isinstance(other, Rectangle):
+            return other.intersectsWith(self)
+
+        elif isinstance(other, Circle):
+            return self.m.distanceTo(other.m) <= self.r + other.r
+
+        raise NotImplementedError
+
+    def distanceTo(self, other: "Point") -> float:
+        if isinstance(other, Point):
+            return other.distanceTo(self)
diff --git a/imperfect_envs/driving/graphics.py b/imperfect_envs/driving/graphics.py
new file mode 100644
index 0000000..4207252
--- /dev/null
+++ b/imperfect_envs/driving/graphics.py
@@ -0,0 +1,904 @@
+# graphics.py
+"""Simple object oriented graphics library  
+
+The library is designed to make it very easy for novice programmers to
+experiment with computer graphics in an object oriented fashion. It is
+written by John Zelle for use with the book "Python Programming: An
+Introduction to Computer Science" (Franklin, Beedle & Associates).
+
+LICENSE: This is open-source software released under the terms of the
+GPL (http://www.gnu.org/licenses/gpl.html).
+
+PLATFORMS: The package is a wrapper around Tkinter and should run on
+any platform where Tkinter is available.
+
+INSTALLATION: Put this file somewhere where Python can see it.
+
+OVERVIEW: There are two kinds of objects in the library. The GraphWin
+class implements a window where drawing can be done and various
+GraphicsObjects are provided that can be drawn into a GraphWin. As a
+simple example, here is a complete program to draw a circle of radius
+10 centered in a 100x100 window:
+
+--------------------------------------------------------------------
+from graphics import *
+
+def main():
+    win = GraphWin("My Circle", 100, 100)
+    c = Circle(Point(50,50), 10)
+    c.draw(win)
+    win.getMouse() # Pause to view result
+    win.close()    # Close window when done
+
+main()
+--------------------------------------------------------------------
+GraphWin objects support coordinate transformation through the
+setCoords method and pointer-based input through getMouse.
+
+The library provides the following graphical objects:
+    Point
+    Line
+    Circle
+    Oval
+    Rectangle
+    Polygon
+    Text
+    Entry (for text-based input)
+    Image
+
+Various attributes of graphical objects can be set such as
+outline-color, fill-color and line-width. Graphical objects also
+support moving and hiding for animation effects.
+
+The library also provides a very simple class for pixel-based image
+manipulation, Pixmap. A pixmap can be loaded from a file and displayed
+using an Image object. Both getPixel and setPixel methods are provided
+for manipulating the image.
+
+DOCUMENTATION: For complete documentation, see Chapter 4 of "Python
+Programming: An Introduction to Computer Science" by John Zelle,
+published by Franklin, Beedle & Associates.  Also see
+http://mcsp.wartburg.edu/zelle/python for a quick reference"""
+
+# Version 4.2 5/26/2011
+#     * Modified Image to allow multiple undraws like other GraphicsObjects
+# Version 4.1 12/29/2009
+#     * Merged Pixmap and Image class. Old Pixmap removed, use Image.
+# Version 4.0.1 10/08/2009
+#     * Modified the autoflush on GraphWin to default to True
+#     * Autoflush check on close, setBackground
+#     * Fixed getMouse to flush pending clicks at entry
+# Version 4.0 08/2009
+#     * Reverted to non-threaded version. The advantages (robustness,
+#         efficiency, ability to use with other Tk code, etc.) outweigh
+#         the disadvantage that interactive use with IDLE is slightly more
+#         cumbersome.
+#     * Modified to run in either Python 2.x or 3.x (same file).
+#     * Added Image.getPixmap()
+#     * Added update() -- stand alone function to cause any pending
+#           graphics changes to display.
+#
+# Version 3.4 10/16/07
+#     Fixed GraphicsError to avoid "exploded" error messages.
+# Version 3.3 8/8/06
+#     Added checkMouse method to GraphWin
+# Version 3.2.3
+#     Fixed error in Polygon init spotted by Andrew Harrington
+#     Fixed improper threading in Image constructor
+# Version 3.2.2 5/30/05
+#     Cleaned up handling of exceptions in Tk thread. The graphics package
+#     now raises an exception if attempt is made to communicate with
+#     a dead Tk thread.
+# Version 3.2.1 5/22/05
+#     Added shutdown function for tk thread to eliminate race-condition
+#        error "chatter" when main thread terminates
+#     Renamed various private globals with _
+# Version 3.2 5/4/05
+#     Added Pixmap object for simple image manipulation.
+# Version 3.1 4/13/05
+#     Improved the Tk thread communication so that most Tk calls
+#        do not have to wait for synchonization with the Tk thread.
+#        (see _tkCall and _tkExec)
+# Version 3.0 12/30/04
+#     Implemented Tk event loop in separate thread. Should now work
+#        interactively with IDLE. Undocumented autoflush feature is
+#        no longer necessary. Its default is now False (off). It may
+#        be removed in a future version.
+#     Better handling of errors regarding operations on windows that
+#       have been closed.
+#     Addition of an isClosed method to GraphWindow class.
+
+# Version 2.2 8/26/04
+#     Fixed cloning bug reported by Joseph Oldham.
+#     Now implements deep copy of config info.
+# Version 2.1 1/15/04
+#     Added autoflush option to GraphWin. When True (default) updates on
+#        the window are done after each action. This makes some graphics
+#        intensive programs sluggish. Turning off autoflush causes updates
+#        to happen during idle periods or when flush is called.
+# Version 2.0
+#     Updated Documentation
+#     Made Polygon accept a list of Points in constructor
+#     Made all drawing functions call TK update for easier animations
+#          and to make the overall package work better with
+#          Python 2.3 and IDLE 1.0 under Windows (still some issues).
+#     Removed vestigial turtle graphics.
+#     Added ability to configure font for Entry objects (analogous to Text)
+#     Added setTextColor for Text as an alias of setFill
+#     Changed to class-style exceptions
+#     Fixed cloning of Text objects
+
+# Version 1.6
+#     Fixed Entry so StringVar uses _root as master, solves weird
+#            interaction with shell in Idle
+#     Fixed bug in setCoords. X and Y coordinates can increase in
+#           "non-intuitive" direction.
+#     Tweaked wm_protocol so window is not resizable and kill box closes.
+
+# Version 1.5
+#     Fixed bug in Entry. Can now define entry before creating a
+#     GraphWin. All GraphWins are now toplevel windows and share
+#     a fixed root (called _root).
+
+# Version 1.4
+#     Fixed Garbage collection of Tkinter images bug.
+#     Added ability to set text atttributes.
+#     Added Entry boxes.
+
+import time, os, sys
+
+try:  # import as appropriate for 2.x vs. 3.x
+    import tkinter as tk
+except:
+    import Tkinter as tk
+
+
+##########################################################################
+# Module Exceptions
+
+
+class GraphicsError(Exception):
+    """Generic error class for graphics module exceptions."""
+
+    pass
+
+
+OBJ_ALREADY_DRAWN = "Object currently drawn"
+UNSUPPORTED_METHOD = "Object doesn't support operation"
+BAD_OPTION = "Illegal option value"
+DEAD_THREAD = "Graphics thread quit unexpectedly"
+
+try:
+    _root = tk.Tk()
+    _root.withdraw()
+except:
+    _root = None
+
+
+def update():
+    _root.update()
+
+
+############################################################################
+# Graphics classes start here
+
+
+class GraphWin(tk.Canvas):
+
+    """A GraphWin is a toplevel window for displaying graphics."""
+
+    def __init__(self, title="Graphics Window", width=200, height=200, autoflush=True):
+        master = tk.Toplevel(_root)
+        master.protocol("WM_DELETE_WINDOW", self.close)
+        tk.Canvas.__init__(self, master, width=width, height=height)
+        self.master.title(title)
+        self.pack()
+        master.resizable(0, 0)
+        self.foreground = "black"
+        self.items = []
+        self.mouseX = None
+        self.mouseY = None
+        self.bind("<Button-1>", self._onClick)
+        self.height = height
+        self.width = width
+        self.autoflush = autoflush
+        self._mouseCallback = None
+        self.trans = None
+        self.closed = False
+        master.lift()
+        if autoflush:
+            _root.update()
+
+    def __checkOpen(self):
+        if self.closed:
+            raise GraphicsError("window is closed")
+
+    def setBackground(self, color):
+        """Set background color of the window"""
+        self.__checkOpen()
+        self.config(bg=color)
+        self.__autoflush()
+
+    def setCoords(self, x1, y1, x2, y2):
+        """Set coordinates of window to run from (x1,y1) in the
+        lower-left corner to (x2,y2) in the upper-right corner."""
+        self.trans = Transform(self.width, self.height, x1, y1, x2, y2)
+
+    def close(self):
+        """Close the window"""
+
+        if self.closed:
+            return
+        self.closed = True
+        self.master.destroy()
+        self.__autoflush()
+
+    def isClosed(self):
+        return self.closed
+
+    def isOpen(self):
+        return not self.closed
+
+    def __autoflush(self):
+        if self.autoflush:
+            _root.update()
+
+    def plot(self, x, y, color="black"):
+        """Set pixel (x,y) to the given color"""
+        self.__checkOpen()
+        xs, ys = self.toScreen(x, y)
+        self.create_line(xs, ys, xs + 1, ys, fill=color)
+        self.__autoflush()
+
+    def plotPixel(self, x, y, color="black"):
+        """Set pixel raw (independent of window coordinates) pixel
+        (x,y) to color"""
+        self.__checkOpen()
+        self.create_line(x, y, x + 1, y, fill=color)
+        self.__autoflush()
+
+    def flush(self):
+        """Update drawing to the window"""
+        self.__checkOpen()
+        self.update_idletasks()
+
+    def getMouse(self):
+        """Wait for mouse click and return Point object representing
+        the click"""
+        self.update()  # flush any prior clicks
+        self.mouseX = None
+        self.mouseY = None
+        while self.mouseX == None or self.mouseY == None:
+            self.update()
+            if self.isClosed():
+                raise GraphicsError("getMouse in closed window")
+            time.sleep(0.1)  # give up thread
+        x, y = self.toWorld(self.mouseX, self.mouseY)
+        self.mouseX = None
+        self.mouseY = None
+        return Point(x, y)
+
+    def checkMouse(self):
+        """Return last mouse click or None if mouse has
+        not been clicked since last call"""
+        if self.isClosed():
+            raise GraphicsError("checkMouse in closed window")
+        self.update()
+        if self.mouseX != None and self.mouseY != None:
+            x, y = self.toWorld(self.mouseX, self.mouseY)
+            self.mouseX = None
+            self.mouseY = None
+            return Point(x, y)
+        else:
+            return None
+
+    def getHeight(self):
+        """Return the height of the window"""
+        return self.height
+
+    def getWidth(self):
+        """Return the width of the window"""
+        return self.width
+
+    def toScreen(self, x, y):
+        trans = self.trans
+        if trans:
+            return self.trans.screen(x, y)
+        else:
+            return x, y
+
+    def toWorld(self, x, y):
+        trans = self.trans
+        if trans:
+            return self.trans.world(x, y)
+        else:
+            return x, y
+
+    def setMouseHandler(self, func):
+        self._mouseCallback = func
+
+    def _onClick(self, e):
+        self.mouseX = e.x
+        self.mouseY = e.y
+        if self._mouseCallback:
+            self._mouseCallback(Point(e.x, e.y))
+
+
+class Transform:
+
+    """Internal class for 2-D coordinate transformations"""
+
+    def __init__(self, w, h, xlow, ylow, xhigh, yhigh):
+        # w, h are width and height of window
+        # (xlow,ylow) coordinates of lower-left [raw (0,h-1)]
+        # (xhigh,yhigh) coordinates of upper-right [raw (w-1,0)]
+        xspan = xhigh - xlow
+        yspan = yhigh - ylow
+        self.xbase = xlow
+        self.ybase = yhigh
+        self.xscale = xspan / float(w - 1)
+        self.yscale = yspan / float(h - 1)
+
+    def screen(self, x, y):
+        # Returns x,y in screen (actually window) coordinates
+        xs = (x - self.xbase) / self.xscale
+        ys = (self.ybase - y) / self.yscale
+        return int(xs + 0.5), int(ys + 0.5)
+
+    def world(self, xs, ys):
+        # Returns xs,ys in world coordinates
+        x = xs * self.xscale + self.xbase
+        y = self.ybase - ys * self.yscale
+        return x, y
+
+
+# Default values for various item configuration options. Only a subset of
+#   keys may be present in the configuration dictionary for a given item
+DEFAULT_CONFIG = {
+    "fill": "",
+    "outline": "black",
+    "width": "1",
+    "arrow": "none",
+    "text": "",
+    "justify": "center",
+    "font": ("helvetica", 12, "normal"),
+}
+
+
+class GraphicsObject:
+
+    """Generic base class for all of the drawable objects"""
+
+    # A subclass of GraphicsObject should override _draw and
+    #   and _move methods.
+
+    def __init__(self, options):
+        # options is a list of strings indicating which options are
+        # legal for this object.
+
+        # When an object is drawn, canvas is set to the GraphWin(canvas)
+        #    object where it is drawn and id is the TK identifier of the
+        #    drawn shape.
+        self.canvas = None
+        self.id = None
+
+        # config is the dictionary of configuration options for the widget.
+        config = {}
+        for option in options:
+            config[option] = DEFAULT_CONFIG[option]
+        self.config = config
+
+    def setFill(self, color):
+        """Set interior color to color"""
+        self._reconfig("fill", color)
+
+    def setOutline(self, color):
+        """Set outline color to color"""
+        self._reconfig("outline", color)
+
+    def setWidth(self, width):
+        """Set line weight to width"""
+        self._reconfig("width", width)
+
+    def draw(self, graphwin):
+
+        """Draw the object in graphwin, which should be a GraphWin
+        object.  A GraphicsObject may only be drawn into one
+        window. Raises an error if attempt made to draw an object that
+        is already visible."""
+
+        if self.canvas and not self.canvas.isClosed():
+            raise GraphicsError(OBJ_ALREADY_DRAWN)
+        if graphwin.isClosed():
+            raise GraphicsError("Can't draw to closed window")
+        self.canvas = graphwin
+        self.id = self._draw(graphwin, self.config)
+        if graphwin.autoflush:
+            _root.update()
+
+    def undraw(self):
+
+        """Undraw the object (i.e. hide it). Returns silently if the
+        object is not currently drawn."""
+
+        if not self.canvas:
+            return
+        if not self.canvas.isClosed():
+            self.canvas.delete(self.id)
+            if self.canvas.autoflush:
+                _root.update()
+        self.canvas = None
+        self.id = None
+
+    def move(self, dx, dy):
+
+        """move object dx units in x direction and dy units in y
+        direction"""
+
+        self._move(dx, dy)
+        canvas = self.canvas
+        if canvas and not canvas.isClosed():
+            trans = canvas.trans
+            if trans:
+                x = dx / trans.xscale
+                y = -dy / trans.yscale
+            else:
+                x = dx
+                y = dy
+            self.canvas.move(self.id, x, y)
+            if canvas.autoflush:
+                _root.update()
+
+    def _reconfig(self, option, setting):
+        # Internal method for changing configuration of the object
+        # Raises an error if the option does not exist in the config
+        #    dictionary for this object
+        if option not in self.config:
+            raise GraphicsError(UNSUPPORTED_METHOD)
+        options = self.config
+        options[option] = setting
+        if self.canvas and not self.canvas.isClosed():
+            self.canvas.itemconfig(self.id, options)
+            if self.canvas.autoflush:
+                _root.update()
+
+    def _draw(self, canvas, options):
+        """draws appropriate figure on canvas with options provided
+        Returns Tk id of item drawn"""
+        pass  # must override in subclass
+
+    def _move(self, dx, dy):
+        """updates internal state of object to move it dx,dy units"""
+        pass  # must override in subclass
+
+
+class Point(GraphicsObject):
+    def __init__(self, x, y):
+        GraphicsObject.__init__(self, ["outline", "fill"])
+        self.setFill = self.setOutline
+        self.x = x
+        self.y = y
+
+    def _draw(self, canvas, options):
+        x, y = canvas.toScreen(self.x, self.y)
+        return canvas.create_rectangle(x, y, x + 1, y + 1, options)
+
+    def _move(self, dx, dy):
+        self.x = self.x + dx
+        self.y = self.y + dy
+
+    def clone(self):
+        other = Point(self.x, self.y)
+        other.config = self.config.copy()
+        return other
+
+    def getX(self):
+        return self.x
+
+    def getY(self):
+        return self.y
+
+
+class _BBox(GraphicsObject):
+    # Internal base class for objects represented by bounding box
+    # (opposite corners) Line segment is a degenerate case.
+
+    def __init__(self, p1, p2, options=["outline", "width", "fill"]):
+        GraphicsObject.__init__(self, options)
+        self.p1 = p1.clone()
+        self.p2 = p2.clone()
+
+    def _move(self, dx, dy):
+        self.p1.x = self.p1.x + dx
+        self.p1.y = self.p1.y + dy
+        self.p2.x = self.p2.x + dx
+        self.p2.y = self.p2.y + dy
+
+    def getP1(self):
+        return self.p1.clone()
+
+    def getP2(self):
+        return self.p2.clone()
+
+    def getCenter(self):
+        p1 = self.p1
+        p2 = self.p2
+        return Point((p1.x + p2.x) / 2.0, (p1.y + p2.y) / 2.0)
+
+
+class Rectangle(_BBox):
+    def __init__(self, p1, p2):
+        _BBox.__init__(self, p1, p2)
+
+    def _draw(self, canvas, options):
+        p1 = self.p1
+        p2 = self.p2
+        x1, y1 = canvas.toScreen(p1.x, p1.y)
+        x2, y2 = canvas.toScreen(p2.x, p2.y)
+        return canvas.create_rectangle(x1, y1, x2, y2, options)
+
+    def clone(self):
+        other = Rectangle(self.p1, self.p2)
+        other.config = self.config.copy()
+        return other
+
+
+class Oval(_BBox):
+    def __init__(self, p1, p2):
+        _BBox.__init__(self, p1, p2)
+
+    def clone(self):
+        other = Oval(self.p1, self.p2)
+        other.config = self.config.copy()
+        return other
+
+    def _draw(self, canvas, options):
+        p1 = self.p1
+        p2 = self.p2
+        x1, y1 = canvas.toScreen(p1.x, p1.y)
+        x2, y2 = canvas.toScreen(p2.x, p2.y)
+        return canvas.create_oval(x1, y1, x2, y2, options)
+
+
+class Circle(Oval):
+    def __init__(self, center, radius):
+        p1 = Point(center.x - radius, center.y - radius)
+        p2 = Point(center.x + radius, center.y + radius)
+        Oval.__init__(self, p1, p2)
+        self.radius = radius
+
+    def clone(self):
+        other = Circle(self.getCenter(), self.radius)
+        other.config = self.config.copy()
+        return other
+
+    def getRadius(self):
+        return self.radius
+
+
+class Line(_BBox):
+    def __init__(self, p1, p2):
+        _BBox.__init__(self, p1, p2, ["arrow", "fill", "width"])
+        self.setFill(DEFAULT_CONFIG["outline"])
+        self.setOutline = self.setFill
+
+    def clone(self):
+        other = Line(self.p1, self.p2)
+        other.config = self.config.copy()
+        return other
+
+    def _draw(self, canvas, options):
+        p1 = self.p1
+        p2 = self.p2
+        x1, y1 = canvas.toScreen(p1.x, p1.y)
+        x2, y2 = canvas.toScreen(p2.x, p2.y)
+        return canvas.create_line(x1, y1, x2, y2, options)
+
+    def setArrow(self, option):
+        if not option in ["first", "last", "both", "none"]:
+            raise GraphicsError(BAD_OPTION)
+        self._reconfig("arrow", option)
+
+
+class Polygon(GraphicsObject):
+    def __init__(self, *points):
+        # if points passed as a list, extract it
+        if len(points) == 1 and type(points[0]) == type([]):
+            points = points[0]
+        self.points = list(map(Point.clone, points))
+        GraphicsObject.__init__(self, ["outline", "width", "fill"])
+
+    def clone(self):
+        other = Polygon(*self.points)
+        other.config = self.config.copy()
+        return other
+
+    def getPoints(self):
+        return list(map(Point.clone, self.points))
+
+    def _move(self, dx, dy):
+        for p in self.points:
+            p.move(dx, dy)
+
+    def _draw(self, canvas, options):
+        args = [canvas]
+        for p in self.points:
+            x, y = canvas.toScreen(p.x, p.y)
+            args.append(x)
+            args.append(y)
+        args.append(options)
+        return GraphWin.create_polygon(*args)
+
+
+class Text(GraphicsObject):
+    def __init__(self, p, text):
+        GraphicsObject.__init__(self, ["justify", "fill", "text", "font"])
+        self.setText(text)
+        self.anchor = p.clone()
+        self.setFill(DEFAULT_CONFIG["outline"])
+        self.setOutline = self.setFill
+
+    def _draw(self, canvas, options):
+        p = self.anchor
+        x, y = canvas.toScreen(p.x, p.y)
+        return canvas.create_text(x, y, options)
+
+    def _move(self, dx, dy):
+        self.anchor.move(dx, dy)
+
+    def clone(self):
+        other = Text(self.anchor, self.config["text"])
+        other.config = self.config.copy()
+        return other
+
+    def setText(self, text):
+        self._reconfig("text", text)
+
+    def getText(self):
+        return self.config["text"]
+
+    def getAnchor(self):
+        return self.anchor.clone()
+
+    def setFace(self, face):
+        if face in ["helvetica", "arial", "courier", "times roman"]:
+            f, s, b = self.config["font"]
+            self._reconfig("font", (face, s, b))
+        else:
+            raise GraphicsError(BAD_OPTION)
+
+    def setSize(self, size):
+        if 5 <= size <= 36:
+            f, s, b = self.config["font"]
+            self._reconfig("font", (f, size, b))
+        else:
+            raise GraphicsError(BAD_OPTION)
+
+    def setStyle(self, style):
+        if style in ["bold", "normal", "italic", "bold italic"]:
+            f, s, b = self.config["font"]
+            self._reconfig("font", (f, s, style))
+        else:
+            raise GraphicsError(BAD_OPTION)
+
+    def setTextColor(self, color):
+        self.setFill(color)
+
+
+class Entry(GraphicsObject):
+    def __init__(self, p, width):
+        GraphicsObject.__init__(self, [])
+        self.anchor = p.clone()
+        # print self.anchor
+        self.width = width
+        self.text = tk.StringVar(_root)
+        self.text.set("")
+        self.fill = "gray"
+        self.color = "black"
+        self.font = DEFAULT_CONFIG["font"]
+        self.entry = None
+
+    def _draw(self, canvas, options):
+        p = self.anchor
+        x, y = canvas.toScreen(p.x, p.y)
+        frm = tk.Frame(canvas.master)
+        self.entry = tk.Entry(
+            frm,
+            width=self.width,
+            textvariable=self.text,
+            bg=self.fill,
+            fg=self.color,
+            font=self.font,
+        )
+        self.entry.pack()
+        # self.setFill(self.fill)
+        return canvas.create_window(x, y, window=frm)
+
+    def getText(self):
+        return self.text.get()
+
+    def _move(self, dx, dy):
+        self.anchor.move(dx, dy)
+
+    def getAnchor(self):
+        return self.anchor.clone()
+
+    def clone(self):
+        other = Entry(self.anchor, self.width)
+        other.config = self.config.copy()
+        other.text = tk.StringVar()
+        other.text.set(self.text.get())
+        other.fill = self.fill
+        return other
+
+    def setText(self, t):
+        self.text.set(t)
+
+    def setFill(self, color):
+        self.fill = color
+        if self.entry:
+            self.entry.config(bg=color)
+
+    def _setFontComponent(self, which, value):
+        font = list(self.font)
+        font[which] = value
+        self.font = tuple(font)
+        if self.entry:
+            self.entry.config(font=self.font)
+
+    def setFace(self, face):
+        if face in ["helvetica", "arial", "courier", "times roman"]:
+            self._setFontComponent(0, face)
+        else:
+            raise GraphicsError(BAD_OPTION)
+
+    def setSize(self, size):
+        if 5 <= size <= 36:
+            self._setFontComponent(1, size)
+        else:
+            raise GraphicsError(BAD_OPTION)
+
+    def setStyle(self, style):
+        if style in ["bold", "normal", "italic", "bold italic"]:
+            self._setFontComponent(2, style)
+        else:
+            raise GraphicsError(BAD_OPTION)
+
+    def setTextColor(self, color):
+        self.color = color
+        if self.entry:
+            self.entry.config(fg=color)
+
+
+class Image(GraphicsObject):
+
+    idCount = 0
+    imageCache = {}  # tk photoimages go here to avoid GC while drawn
+
+    def __init__(self, p, *pixmap):
+        GraphicsObject.__init__(self, [])
+        self.anchor = p.clone()
+        self.imageId = Image.idCount
+        Image.idCount = Image.idCount + 1
+        if len(pixmap) == 1:  # file name provided
+            self.img = tk.PhotoImage(file=pixmap[0], master=_root)
+        else:  # width and height provided
+            width, height = pixmap
+            self.img = tk.PhotoImage(master=_root, width=width, height=height)
+
+    def _draw(self, canvas, options):
+        p = self.anchor
+        x, y = canvas.toScreen(p.x, p.y)
+        self.imageCache[self.imageId] = self.img  # save a reference
+        return canvas.create_image(x, y, image=self.img)
+
+    def _move(self, dx, dy):
+        self.anchor.move(dx, dy)
+
+    def undraw(self):
+        try:
+            del self.imageCache[self.imageId]  # allow gc of tk photoimage
+        except KeyError:
+            pass
+        GraphicsObject.undraw(self)
+
+    def getAnchor(self):
+        return self.anchor.clone()
+
+    def clone(self):
+        other = Image(Point(0, 0), 0, 0)
+        other.img = self.img.copy()
+        other.anchor = self.anchor.clone()
+        other.config = self.config.copy()
+        return other
+
+    def getWidth(self):
+        """Returns the width of the image in pixels"""
+        return self.img.width()
+
+    def getHeight(self):
+        """Returns the height of the image in pixels"""
+        return self.img.height()
+
+    def getPixel(self, x, y):
+        """Returns a list [r,g,b] with the RGB color values for pixel (x,y)
+        r,g,b are in range(256)
+
+        """
+
+        value = self.img.get(x, y)
+        if type(value) == type(0):
+            return [value, value, value]
+        else:
+            return list(map(int, value.split()))
+
+    def setPixel(self, x, y, color):
+        """Sets pixel (x,y) to the given color
+        
+        """
+        self.img.put("{" + color + "}", (x, y))
+
+    def save(self, filename):
+        """Saves the pixmap image to filename.
+        The format for the save image is determined from the filname extension.
+
+        """
+
+        path, name = os.path.split(filename)
+        ext = name.split(".")[-1]
+        self.img.write(filename, format=ext)
+
+
+def color_rgb(r, g, b):
+    """r,g,b are intensities of red, green, and blue in range(256)
+    Returns color specifier string for the resulting color"""
+    return "#%02x%02x%02x" % (r, g, b)
+
+
+def test():
+    win = GraphWin()
+    win.setCoords(0, 0, 10, 10)
+    t = Text(Point(5, 5), "Centered Text")
+    t.draw(win)
+    p = Polygon(Point(1, 1), Point(5, 3), Point(2, 7))
+    p.draw(win)
+    e = Entry(Point(5, 6), 10)
+    e.draw(win)
+    win.getMouse()
+    p.setFill("red")
+    p.setOutline("blue")
+    p.setWidth(2)
+    s = ""
+    for pt in p.getPoints():
+        s = s + "(%0.1f,%0.1f) " % (pt.getX(), pt.getY())
+    t.setText(e.getText())
+    e.setFill("green")
+    e.setText("Spam!")
+    e.move(2, 0)
+    win.getMouse()
+    p.move(2, 3)
+    s = ""
+    for pt in p.getPoints():
+        s = s + "(%0.1f,%0.1f) " % (pt.getX(), pt.getY())
+    t.setText(s)
+    win.getMouse()
+    p.undraw()
+    e.undraw()
+    t.setStyle("bold")
+    win.getMouse()
+    t.setStyle("normal")
+    win.getMouse()
+    t.setStyle("italic")
+    win.getMouse()
+    t.setStyle("bold italic")
+    win.getMouse()
+    t.setSize(14)
+    win.getMouse()
+    t.setFace("arial")
+    t.setSize(20)
+    win.getMouse()
+    win.close()
+
+
+if __name__ == "__main__":
+    test()
diff --git a/imperfect_envs/driving/visualizer.py b/imperfect_envs/driving/visualizer.py
new file mode 100644
index 0000000..d7c3b1f
--- /dev/null
+++ b/imperfect_envs/driving/visualizer.py
@@ -0,0 +1,82 @@
+from driving.graphics import *
+from driving.entities import RectangleEntity, CircleEntity, TextEntity
+
+
+
+class Visualizer:
+    def __init__(self, width: float, height: float, ppm: int):
+        # width (meters)
+        # height (meters)
+        # ppm is the number of pixels per meters
+
+        self.ppm = ppm
+        self.display_width, self.display_height = int(width * ppm), int(height * ppm)
+        self.window_created = False
+        self.visualized_imgs = []
+        self.win = None
+
+    def create_window(self, bg_color: str = "gray80"):
+        if not self.window_created or self.win.isClosed():
+            self.win = GraphWin("CARLO", self.display_width, self.display_height)
+            self.win.setBackground(bg_color)
+            self.window_created = True
+            self.visualized_imgs = []
+
+    def update_agents(self, agents: list, correct_pos: list=None, next_pos: list=None):
+        new_visualized_imgs = []
+
+        # Remove the movable agents from the window
+        for imgItem in self.visualized_imgs:
+            if imgItem["movable"]:
+                imgItem["graphics"].undraw()
+            else:
+                new_visualized_imgs.append({"movable": False, "graphics": imgItem["graphics"]})
+
+        # Add the updated movable agents (and the unmovable ones if they were not rendered before)
+        for agent in agents:
+            if isinstance(agent, TextEntity):
+                img = Text(
+                    Point(
+                        self.ppm * agent.center.x,
+                        self.display_height - self.ppm * agent.center.y,
+                    ),
+                    agent.text,
+                )
+                img.setSize(15)
+                img.draw(self.win)
+                # TODO(allanz): Hack: set movable=True so text is erased each iteration.
+                new_visualized_imgs.append({"movable": True, "graphics": img})
+            elif agent.movable or not self.visualized_imgs:
+                if isinstance(agent, RectangleEntity):
+                    C = [self.ppm * c for c in agent.corners]
+                    img = Polygon([Point(c.x, self.display_height - c.y) for c in C])
+
+                    # arrow
+                    if agent.movable and (correct_pos and next_pos):
+                        start = Point(self.ppm * agent.center.x, self.display_height - self.ppm * agent.center.y)
+                        end = Point(self.ppm * correct_pos[0], self.display_height - self.ppm * correct_pos[1])
+                        #print("ACTION??: ", correct_pos)
+                        line = Line(start, end)
+                        line.setArrow("last")
+                        line.draw(self.win)
+                        new_visualized_imgs.append({"movable": agent.movable, "graphics": line})
+                elif isinstance(agent, CircleEntity):
+                    img = Circle(
+                        Point(
+                            self.ppm * agent.center.x,
+                            self.display_height - self.ppm * agent.center.y,
+                        ),
+                        self.ppm * agent.radius,
+                    )
+                else:
+                    raise NotImplementedError
+                img.setFill(agent.color)
+                img.draw(self.win)
+                new_visualized_imgs.append({"movable": agent.movable, "graphics": img})
+
+        self.visualized_imgs = new_visualized_imgs
+
+    def close(self):
+        self.window_created = False
+        self.win.close()
+        self.visualized_imgs = []
diff --git a/imperfect_envs/driving/world.py b/imperfect_envs/driving/world.py
new file mode 100644
index 0000000..2757471
--- /dev/null
+++ b/imperfect_envs/driving/world.py
@@ -0,0 +1,83 @@
+from typing import Union
+import numpy as np
+from driving.agents import Car, Pedestrian, Building
+from driving.entities import Entity
+from driving.visualizer import Visualizer
+
+
+class World:
+    def __init__(self, dt: float, width: float, height: float, ppm: float = 8):
+        self.dynamic_agents = []
+        self.static_agents = []
+        self.t = 0  # simulation time
+        self.dt = dt  # simulation time step
+        self.visualizer = Visualizer(width, height, ppm=ppm)
+
+    def add(self, entity: Entity):
+        if entity.movable:
+            self.dynamic_agents.append(entity)
+        else:
+            self.static_agents.append(entity)
+
+    def tick(self):
+        for agent in self.dynamic_agents:
+            agent.tick(self.dt)
+        self.t += self.dt
+
+    def render(self, correct_pos=None, next_pos=None):
+        self.visualizer.create_window(bg_color="gray")
+        self.visualizer.update_agents(self.agents, correct_pos, next_pos)
+
+    @property
+    def state(self):
+        return np.concatenate([agent.state for agent in self.dynamic_agents])
+
+    @state.setter
+    def state(self, x):
+        num_agents = len(self.dynamic_agents)
+        assert x.shape[0] % num_agents == 0
+        agent_state_length = int(x.shape[0] / num_agents)
+        offset = 0
+        for agent in self.dynamic_agents:
+            agent_new_state = x[offset : offset + agent_state_length]
+            agent.state = agent_new_state
+            offset += agent_state_length
+
+    @property
+    def agents(self):
+        return self.static_agents + self.dynamic_agents
+
+    def collision_exists(self, agent=None):
+        if agent is None:
+            for i in range(len(self.dynamic_agents)):
+                for j in range(i + 1, len(self.dynamic_agents)):
+                    if self.dynamic_agents[i].collidable and self.dynamic_agents[j].collidable:
+                        if self.dynamic_agents[i].collidesWith(self.dynamic_agents[j]):
+                            return True
+                for j in range(len(self.static_agents)):
+                    if self.dynamic_agents[i].collidable and self.static_agents[j].collidable:
+                        if self.dynamic_agents[i].collidesWith(self.static_agents[j]):
+                            return True
+            return False
+
+        if not agent.collidable:
+            return False
+
+        for i in range(len(self.agents)):
+            if (
+                self.agents[i] is not agent
+                and self.agents[i].collidable
+                and agent.collidesWith(self.agents[i])
+            ):
+                return True
+        return False
+
+    def close(self):
+        self.reset()
+        self.static_agents = []
+        self.visualizer.close()
+
+    def reset(self):
+        self.dynamic_agents = []
+        self.static_agents = []
+        self.t = 0
diff --git a/imperfect_envs/imperfect.egg-info/PKG-INFO b/imperfect_envs/imperfect.egg-info/PKG-INFO
new file mode 100644
index 0000000..75eeb17
--- /dev/null
+++ b/imperfect_envs/imperfect.egg-info/PKG-INFO
@@ -0,0 +1,10 @@
+Metadata-Version: 2.1
+Name: imperfect
+Version: 0.0.1
+Summary: UNKNOWN
+Home-page: UNKNOWN
+License: UNKNOWN
+Platform: UNKNOWN
+
+UNKNOWN
+
diff --git a/imperfect_envs/imperfect.egg-info/SOURCES.txt b/imperfect_envs/imperfect.egg-info/SOURCES.txt
new file mode 100644
index 0000000..1a9d17b
--- /dev/null
+++ b/imperfect_envs/imperfect.egg-info/SOURCES.txt
@@ -0,0 +1,7 @@
+README.md
+setup.py
+imperfect.egg-info/PKG-INFO
+imperfect.egg-info/SOURCES.txt
+imperfect.egg-info/dependency_links.txt
+imperfect.egg-info/requires.txt
+imperfect.egg-info/top_level.txt
\ No newline at end of file
diff --git a/imperfect_envs/imperfect.egg-info/dependency_links.txt b/imperfect_envs/imperfect.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/imperfect_envs/imperfect.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/imperfect_envs/imperfect.egg-info/requires.txt b/imperfect_envs/imperfect.egg-info/requires.txt
new file mode 100644
index 0000000..01ef558
--- /dev/null
+++ b/imperfect_envs/imperfect.egg-info/requires.txt
@@ -0,0 +1,3 @@
+gym
+numpy
+reacher
diff --git a/imperfect_envs/imperfect.egg-info/top_level.txt b/imperfect_envs/imperfect.egg-info/top_level.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/imperfect_envs/imperfect.egg-info/top_level.txt
@@ -0,0 +1 @@
+
diff --git a/imperfect_envs/reacher/__init__.py b/imperfect_envs/reacher/__init__.py
new file mode 100644
index 0000000..4f42dff
--- /dev/null
+++ b/imperfect_envs/reacher/__init__.py
@@ -0,0 +1,35 @@
+from gym.envs.registration import register
+
+register(
+    id='reacher_custom-v0',
+    entry_point='reacher.envs:ReacherCustomEnv',
+    max_episode_steps=50,
+    reward_threshold=-3.75,
+)
+
+register(
+    id='reacher_custom-action1-v0',
+    entry_point='reacher.envs:ReacherCustomAction1Env',
+    max_episode_steps=50,
+    reward_threshold=-3.75,
+)
+
+register(
+    id='reacher_custom-action2-v0',
+    entry_point='reacher.envs:ReacherCustomAction2Env',
+    max_episode_steps=50,
+    reward_threshold=-3.75,
+)
+register(
+    id='reacher_custom-raction1-v0',
+    entry_point='reacher.envs:ReacherCustomRAction1Env',
+    max_episode_steps=50,
+    reward_threshold=-3.75,
+)
+
+register(
+    id='reacher_custom-raction2-v0',
+    entry_point='reacher.envs:ReacherCustomRAction2Env',
+    max_episode_steps=50,
+    reward_threshold=-3.75,
+)
diff --git a/imperfect_envs/reacher/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/reacher/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..f82214d
Binary files /dev/null and b/imperfect_envs/reacher/__pycache__/__init__.cpython-310.pyc differ
diff --git a/imperfect_envs/reacher/envs/__init__.py b/imperfect_envs/reacher/envs/__init__.py
new file mode 100644
index 0000000..8ab022f
--- /dev/null
+++ b/imperfect_envs/reacher/envs/__init__.py
@@ -0,0 +1,2 @@
+from reacher.envs.reacher import ReacherCustomEnv
+from reacher.envs.reacher import ReacherCustomAction1Env, ReacherCustomAction2Env, ReacherCustomRAction1Env, ReacherCustomRAction2Env
diff --git a/imperfect_envs/reacher/envs/__pycache__/__init__.cpython-310.pyc b/imperfect_envs/reacher/envs/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000..f95ddcb
Binary files /dev/null and b/imperfect_envs/reacher/envs/__pycache__/__init__.cpython-310.pyc differ
diff --git a/imperfect_envs/reacher/envs/__pycache__/reacher.cpython-310.pyc b/imperfect_envs/reacher/envs/__pycache__/reacher.cpython-310.pyc
new file mode 100644
index 0000000..2a15908
Binary files /dev/null and b/imperfect_envs/reacher/envs/__pycache__/reacher.cpython-310.pyc differ
diff --git a/imperfect_envs/reacher/envs/assets/reacher.xml b/imperfect_envs/reacher/envs/assets/reacher.xml
new file mode 100644
index 0000000..be97198
--- /dev/null
+++ b/imperfect_envs/reacher/envs/assets/reacher.xml
@@ -0,0 +1,39 @@
+<mujoco model="reacher">
+	<compiler angle="radian" inertiafromgeom="true"/>
+	<default>
+		<joint armature="1" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
+	<worldbody>
+		<!-- Arena -->
+		<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<!-- Arm -->
+		<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
+		<body name="body0" pos="0 0 .01">
+			<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+			<joint axis="0 0 1" limited="false" name="joint0" pos="0 0 0" type="hinge"/>
+			<body name="body1" pos="0.1 0 0">
+				<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-.0001 .0001" type="hinge"/>
+				<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+				<body name="fingertip" pos="0.11 0 0">
+					<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
+				</body>
+			</body>
+		</body>
+		<!-- Target -->
+		<body name="target" pos=".1 -.1 .01">
+			<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
+			<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
+			<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
+	</actuator>
+</mujoco>
diff --git a/imperfect_envs/reacher/envs/assets/reacher_action1.xml b/imperfect_envs/reacher/envs/assets/reacher_action1.xml
new file mode 100644
index 0000000..a86f5b4
--- /dev/null
+++ b/imperfect_envs/reacher/envs/assets/reacher_action1.xml
@@ -0,0 +1,39 @@
+<mujoco model="reacher">
+	<compiler angle="radian" inertiafromgeom="true"/>
+	<default>
+		<joint armature="1" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
+	<worldbody>
+		<!-- Arena -->
+		<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<!-- Arm -->
+		<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
+		<body name="body0" pos="0 0 .01">
+			<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+			<joint axis="0 0 1" limited="true" name="joint0" pos="0 0 0" range="-2.35619445 0" type="hinge"/>
+			<body name="body1" pos="0.1 0 0">
+				<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-0.0001 0.0001" type="hinge"/>
+				<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+				<body name="fingertip" pos="0.11 0 0">
+					<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
+				</body>
+			</body>
+		</body>
+		<!-- Target -->
+		<body name="target" pos=".1 -.1 .01">
+			<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
+			<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
+			<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
+	</actuator>
+</mujoco>
diff --git a/imperfect_envs/reacher/envs/assets/reacher_action2.xml b/imperfect_envs/reacher/envs/assets/reacher_action2.xml
new file mode 100644
index 0000000..a685385
--- /dev/null
+++ b/imperfect_envs/reacher/envs/assets/reacher_action2.xml
@@ -0,0 +1,39 @@
+<mujoco model="reacher">
+	<compiler angle="radian" inertiafromgeom="true"/>
+	<default>
+		<joint armature="1" damping="1" limited="true"/>
+		<geom contype="0" friction="1 0.1 0.1" rgba="0.7 0.7 0 1"/>
+	</default>
+	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
+	<worldbody>
+		<!-- Arena -->
+		<geom conaffinity="0" contype="0" name="ground" pos="0 0 0" rgba="0.9 0.9 0.9 1" size="1 1 10" type="plane"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3  .3 .01 .3  .3 .01" name="sideN" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<geom conaffinity="0" fromto="-.3 -.3 .01 -.3 .3 .01" name="sideW" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
+		<!-- Arm -->
+		<geom conaffinity="0" contype="0" fromto="0 0 0 0 0 0.02" name="root" rgba="0.9 0.4 0.6 1" size=".011" type="cylinder"/>
+		<body name="body0" pos="0 0 .01">
+			<geom fromto="0 0 0 0.1 0 0" name="link0" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+			<joint axis="0 0 1" limited="true" name="joint0" pos="0 0 0" range="0 2.35619445" type="hinge"/>
+			<body name="body1" pos="0.1 0 0">
+				<joint axis="0 0 1" limited="true" name="joint1" pos="0 0 0" range="-0.0001 0.0001" type="hinge"/>
+				<geom fromto="0 0 0 0.1 0 0" name="link1" rgba="0.0 0.4 0.6 1" size=".01" type="capsule"/>
+				<body name="fingertip" pos="0.11 0 0">
+					<geom contype="0" name="fingertip" pos="0 0 0" rgba="0.0 0.8 0.6 1" size=".01" type="sphere"/>
+				</body>
+			</body>
+		</body>
+		<!-- Target -->
+		<body name="target" pos=".1 -.1 .01">
+			<joint armature="0" axis="1 0 0" damping="0" limited="true" name="target_x" pos="0 0 0" range="-.27 .27" ref=".1" stiffness="0" type="slide"/>
+			<joint armature="0" axis="0 1 0" damping="0" limited="true" name="target_y" pos="0 0 0" range="-.27 .27" ref="-.1" stiffness="0" type="slide"/>
+			<geom conaffinity="0" contype="0" name="target" pos="0 0 0" rgba="0.9 0.2 0.2 1" size=".009" type="sphere"/>
+		</body>
+	</worldbody>
+	<actuator>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint0"/>
+		<motor ctrllimited="true" ctrlrange="-1.0 1.0" gear="200.0" joint="joint1"/>
+	</actuator>
+</mujoco>
diff --git a/imperfect_envs/reacher/envs/reacher.py b/imperfect_envs/reacher/envs/reacher.py
new file mode 100644
index 0000000..9ba0567
--- /dev/null
+++ b/imperfect_envs/reacher/envs/reacher.py
@@ -0,0 +1,97 @@
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+import gym
+from gym import spaces
+import os
+import random
+
+class ReacherCustomEnv(mujoco_env.MuJocoPyEnv, utils.EzPickle):
+    metadata = {'render_modes': ['human', 'rgb_array', 'depth_array'], 'render_fps': 50}
+    def __init__(self, config_file='reacher.xml', **kwargs):
+        dir_path = os.path.dirname(os.path.realpath(__file__))
+        utils.EzPickle.__init__(self)
+        # print("fullpath is here ", self.fullpath)
+        # self._initialize_simulation()
+        self.observation_space = spaces.Box(low = -np.inf, high = np.inf, shape=(11,), dtype=np.float32)
+        # self.action_space = spaces.Box(low = -np.inf, high = np.inf, shape=(2,), dtype=np.float32)
+        mujoco_env.MuJocoPyEnv.__init__(self, ('%s/assets/'+config_file) % dir_path, 2, self.observation_space, **kwargs)
+
+    def step(self, a):
+        vec = self.get_body_com("fingertip")-self.get_body_com("target")
+        reward_dist = - np.linalg.norm(vec)
+        reward_ctrl = - np.square(a).sum()
+        reward = reward_dist + reward_ctrl
+        self.do_simulation(a, self.frame_skip)
+        ob = self._get_obs()
+        done = False
+        reward_for_eval = reward_dist * 10# - np.sqrt(self.sim.data.qvel.flat[0]**2+self.sim.data.qvel.flat[1]**2) / 20.
+
+        return ob, reward, done, False, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl, reward_eval=reward_for_eval)
+
+    def viewer_setup(self):
+        self.viewer.cam.trackbodyid = 0
+
+    def reset_with_obs(self, obs):
+        self.sim.reset()
+        qpos = np.array([0., 0., 0., 0.])
+        self.goal = obs[4:6]
+        qpos[-2:] = self.goal
+        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qvel[-2:] = 0
+        qvel[0:2] = obs[6:8]
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def reset_model(self):
+        #self.close_goal = False
+        #qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
+        #while True:
+        #    self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
+        #    if np.linalg.norm(self.goal) < 0.2:
+        #        break
+        qpos = np.array([0., 0., 0., 0.])
+        self.goal = np.concatenate([self.np_random.uniform(low=-.1, high=.1, size=1),
+                                    self.np_random.uniform(low=-.2, high=-.1, size=1) if self.np_random.uniform(low=0, high=1., size=1)[0]>0.5 else self.np_random.uniform(low=.1, high=.2, size=1)])
+        qpos[-2:] = self.goal
+        qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
+        qvel[-2:] = 0
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def _get_obs(self):
+        theta = self.sim.data.qpos.flat[:2]
+        return np.concatenate([
+            np.cos(theta),
+            np.sin(theta),
+            self.sim.data.qpos.flat[2:],
+            self.sim.data.qvel.flat[:2],
+            self.get_body_com("fingertip") - self.get_body_com("target")
+        ])
+
+class ReacherCustomAction1Env(ReacherCustomEnv):
+    def __init__(self, **kwargs):
+        super(ReacherCustomAction1Env, self).__init__('reacher_action1.xml', **kwargs)
+
+class ReacherCustomRAction1Env(ReacherCustomEnv):
+    def __init__(self, **kwargs):
+        super(ReacherCustomRAction1Env, self).__init__('reacher_action1.xml', **kwargs)
+        self.action_space = gym.spaces.Box(low=np.array([-1.,-1.]).astype('float32'), high=np.array([0.,0.]).astype('float32'))
+
+    def step(self, a):
+        a = np.clip(a, -1., 0.)
+        return super(ReacherCustomRAction1Env, self).step(a)
+
+class ReacherCustomAction2Env(ReacherCustomEnv):
+    def __init__(self, **kwargs):
+        super(ReacherCustomAction2Env, self).__init__('reacher_action2.xml', **kwargs)
+
+class ReacherCustomRAction2Env(ReacherCustomEnv):
+    def __init__(self, **kwargs):
+        super(ReacherCustomRAction2Env, self).__init__('reacher_action2.xml', kwargs)
+        self.action_space = gym.spaces.Box(low=np.array([0.,0.]).astype('float32'), high=np.array([1.,1.]).astype('float32'))
+
+    def step(self, a):
+        a = np.clip(a, 0., 1.)
+        return super(ReacherCustomRAction2Env, self).step(a)
+
diff --git a/imperfect_envs/setup.py b/imperfect_envs/setup.py
new file mode 100644
index 0000000..b25667d
--- /dev/null
+++ b/imperfect_envs/setup.py
@@ -0,0 +1,6 @@
+from setuptools import setup
+
+setup(name='imperfect',
+      version='0.0.1',
+      install_requires=['gym', 'numpy', 'reacher']  # And any other dependencies foo needs
+)
diff --git a/simulate_data.py b/simulate_data.py
new file mode 100644
index 0000000..5ae9390
--- /dev/null
+++ b/simulate_data.py
@@ -0,0 +1,122 @@
+import gym
+import gym.wrappers
+import reacher
+import driving
+import time
+from gym import make 
+import numpy as np
+import argparse
+import pickle
+parser = argparse.ArgumentParser(description='Test the model')
+parser.add_argument('--num-episodes', type=int, default=10)
+parser.add_argument('--seed', type=int, default=1001)
+parser.add_argument('--use-sleep', action='store_true')
+parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0')
+parser.add_argument('--data_path', type=str, default='new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl')
+parser.add_argument('--render', action='store_true')
+parser.add_argument('--goalx', type=int, default=15)
+parser.add_argument('--goaly', type=int, default=38)
+
+paths = {'optimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl',
+         'suboptimalfast1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_10_38_0.pkl',
+         'optimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl',
+         'suboptimalfast1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl',
+         'optimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl',
+         'suboptimalfast2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousFastRandom-v0/suboptimal_data_ContinuousFastRandom-v0_0_20_38_0.pkl',
+         'optimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl',
+         'suboptimalslow1038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_10_38_0.pkl',
+         'optimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl',
+         'suboptimalslow1538' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_15_38_0.pkl',
+         'optimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/optimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl',
+         'suboptimalslow2038' : '/home/smart/PPO-PyTorch/new_data/ContinuousSlowRandom-v0/suboptimal_data_ContinuousSlowRandom-v0_0_20_38_0.pkl'}
+args = parser.parse_args()
+with open(args.data_path, 'rb') as f:
+    episodes = pickle.load(f)
+print(len(episodes))
+print(episodes[1]['reward'])
+args = parser.parse_args()
+
+
+env1 = gym.make(args.env)
+num_inputs = env1.observation_space.shape[0]
+num_actions = env1.action_space.shape[0]
+print(num_inputs, num_actions)
+for i in range(args.num_episodes):
+    accumulator = 0
+    accumulator2 = 0
+    initial_state = episodes[i]['state'][0]
+    env1.reset(goal = initial_state[7:9])
+    print('goal location', initial_state[7:9])
+    env1.reset_with_obs(initial_state)
+    # print("initial state ", initial_state)
+    # print("initial state as per the model ", env1.get_obs())
+    assert np.allclose(env1.get_obs(), initial_state)
+    if args.render:
+        env1.render()
+    for step in range(len(episodes[i]['reward'])):
+        action = episodes[i]['action'][step]
+        print(action.dtype)
+        next_state, reward, done, _, info= env1.step(action)
+        print("next state as per the model ", next_state)
+        print("next state as per the data ", episodes[i]['state'][step+1])
+        try:
+            assert np.allclose(next_state, episodes[i]['state'][step+1])
+        except:
+            print("the two states are not the same ", next_state, episodes[i]['state'][step+1])
+        accumulator += episodes[i]['reward'][step]
+        accumulator2 += reward
+        if args.render:
+            env1.render()
+        if args.use_sleep: 
+            time.sleep(0.05)
+    time.sleep(0.1)
+    print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2))
+env1.close()
+
+# with open(paths['optimalfast1538'], 'rb') as f:
+#     episodes = pickle.load(f)
+# print(len(episodes))
+# print(episodes[1]['reward'])
+# environment = args.env
+# env1 = gym.make(environment)
+# # env1.set_goal(args.goalx, args.goaly)
+# # env1.reset(goal = [args.goalx, args.goaly])
+# # env1.set_goal(args.goalx, args.goaly)
+# num_inputs = env1.observation_space.shape[0]
+# num_actions = env1.action_space.shape[0]
+# print(num_inputs, num_actions)
+# for i in range(args.num_episodes):
+#     accumulator = 0
+#     accumulator2 = 0
+#     initial_state = episodes[i]['state'][0]
+#     env1.reset(goal = initial_state[7:9])
+#     print('goal location', initial_state[7:9])
+#     # env1.set_goal(initial_state[7], initial_state[8])
+#     env1.reset_with_obs(initial_state)
+#     if args.render:
+#         env1.render()
+#     for step in range(len(episodes[i]['reward'])):
+#         # accumulator += episodes[i]['reward'][step]
+#         action = episodes[i]['action'][step]
+#         # print(episodes[i]['reward'][step])
+#         next_state, reward, done, _, info= env1.step(action)
+#         # print("next state as per the model ", next_state)
+#         # print("next state as per the data ", episodes[i]['state'][step+1])
+#         # env1.reset_with_obs(episodes[i]['state'][step+1])
+#         accumulator += episodes[i]['reward'][step]
+#         accumulator2 += reward
+#         if args.render:
+#             env1.render()
+#         if args.use_sleep: 
+#             time.sleep(0.05)
+#     time.sleep(0.1)
+#     print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2))
+# env1.close()
+
+
+# import pickle as pkl
+# with open('new_data/ContinuousFastRandom-v0/optimal_data_ContinuousFastRandom-v0_0_15_38_0.pkl', 'rb') as f:
+#     data = pkl.load(f)
+# print(len(data[0]['state']))
+# print((data[0]['state'][0]))
+# print(len(data[0]['state'][0]))
diff --git a/test.py b/test.py
index e23cbdf..83487bf 100644
--- a/test.py
+++ b/test.py
@@ -5,10 +5,12 @@
 
 import torch
 import numpy as np
-
+import time
 import gym
-import roboschool
-
+import pickle
+# import roboschool
+import driving
+import argparse
 from PPO import PPO
 
 
@@ -17,42 +19,60 @@ def test():
     print("============================================================================================")
 
     ################## hyperparameters ##################
-
-    # env_name = "CartPole-v1"
+    parser = argparse.ArgumentParser(description='Test the model')
+    parser.add_argument('--seed', type=int, default=1001)
+    parser.add_argument('--goalx', type=int, default=15)
+    parser.add_argument('--goaly', type=int, default=38)
+    parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0')
+    parser.add_argument('--render', action='store_true')
+    parser.add_argument('--use-sleep', action='store_true')
+    parser.add_argument('--optimal', action='store_true')
+    parser.add_argument('--suboptimal', action='store_true')
+    parser.add_argument('--num_episodes', type=int, default=10)
+    parser.add_argument('--max_num_samples', type=int, default=1000)
+    parser.add_argument('--threshold', type=float, default=0)
+    parser.add_argument('--dontsave', action='store_true')
+    args = parser.parse_args()
+    # args.env = "CartPole-v1"
     # has_continuous_action_space = False
     # max_ep_len = 400
     # action_std = None
 
-    # env_name = "LunarLander-v2"
+    # args.env = "LunarLander-v2"
     # has_continuous_action_space = False
     # max_ep_len = 300
     # action_std = None
 
-    # env_name = "BipedalWalker-v2"
+    # args.env = "BipedalWalker-v2"
     # has_continuous_action_space = True
     # max_ep_len = 1500           # max timesteps in one episode
     # action_std = 0.1            # set same std for action distribution which was used while saving
 
-    env_name = "RoboschoolWalker2d-v1"
     has_continuous_action_space = True
     max_ep_len = 1000           # max timesteps in one episode
     action_std = 0.1            # set same std for action distribution which was used while saving
 
-    render = True              # render environment on screen
+    # delay = True               # add delay b/w frames to make video like real time
+    # render = True              # render environment on screen
     frame_delay = 0             # if required; add delay b/w frames
 
-    total_test_episodes = 10    # total num of testing episodes
+    # total_test_episodes = 10    # total num of testing episodes
 
     K_epochs = 80               # update policy for K epochs
     eps_clip = 0.2              # clip parameter for PPO
     gamma = 0.99                # discount factor
 
     lr_actor = 0.0003           # learning rate for actor
-    lr_critic = 0.001           # learning rate for critic
-
+    lr_critic = 0.001 
+              # learning rate for critic
+    run_best_model = False
+    if args.optimal:
+        run_best_model = True
+          # load and run the best saved model
     #####################################################
 
-    env = gym.make(env_name)
+    env = gym.make(args.env)
+    env.set_goal(args.goalx, args.goaly)
 
     # state space dimension
     state_dim = env.observation_space.shape[0]
@@ -71,8 +91,24 @@ def test():
     random_seed = 0             #### set this to load a particular checkpoint trained on random seed
     run_num_pretrained = 0      #### set this to load a particular checkpoint num
 
-    directory = "PPO_preTrained" + '/' + env_name + '/'
-    checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, random_seed, run_num_pretrained)
+    directory = "PPO_preTrained" + '/' + args.env + '/'
+    data_directory = "new_data"
+    if not os.path.exists(directory):
+        print("No directory found")
+        exit()
+    if not os.path.exists(data_directory):
+        os.makedirs(data_directory)
+    data_directory = data_directory + '/' + args.env + '/'
+    if not os.path.exists(data_directory):
+        os.makedirs(data_directory)
+    
+    if not run_best_model:
+        checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+    else:
+        checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}best.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+
+    optimal_data_path = data_directory + "optimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+    suboptimal_data_path = data_directory + "suboptimal_data_{}_{}_{}_{}_{}.pkl".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
     print("loading network from : " + checkpoint_path)
 
     ppo_agent.load(checkpoint_path)
@@ -80,40 +116,104 @@ def test():
     print("--------------------------------------------------------------------------------------------")
 
     test_running_reward = 0
-
-    for ep in range(1, total_test_episodes+1):
+    dataload = []
+    num_optimal = 1
+    num_suboptimal = 1
+    for ep in range(1, args.num_episodes+1):
         ep_reward = 0
         state = env.reset()
-
+        state_dict = {'state' : [], 'action': [], 'reward': [], 'optimal': []}
+        state_dict['state'].append(state)
         for t in range(1, max_ep_len+1):
             action = ppo_agent.select_action(state)
-            state, reward, done, _ = env.step(action)
+            # print(action.dtype)
+            state, reward, done, _, _= env.step(action)
             ep_reward += reward
-
-            if render:
+            state_dict['state'].append(state)
+            state_dict['action'].append(action)
+            state_dict['reward'].append(reward)
+            if args.render:
                 env.render()
-                time.sleep(frame_delay)
-
+                # time.sleep(frame_delay)
+            if args.use_sleep:
+                time.sleep(0.05)
             if done:
                 break
 
         # clear buffer
         ppo_agent.buffer.clear()
-
+        if num_optimal > args.max_num_samples and args.optimal and not args.suboptimal:
+            break
+        if num_suboptimal > args.max_num_samples and args.suboptimal and not args.optimal:
+            break
+        if num_optimal > args.max_num_samples and num_suboptimal > args.max_num_samples and args.optimal and args.suboptimal:
+            break
+        if ep_reward > args.threshold and args.optimal:
+            state_dict['optimal'] = [True] * len(state_dict['action'])
+            num_optimal += 1
+            dataload.append(state_dict)
+        if ep_reward <= args.threshold and args.suboptimal:
+            state_dict['optimal'] = [False] * len(state_dict['action'])
+            num_suboptimal += 1
+            dataload.append(state_dict)
         test_running_reward +=  ep_reward
         print('Episode: {} \t\t Reward: {}'.format(ep, round(ep_reward, 2)))
+        
         ep_reward = 0
 
     env.close()
 
     print("============================================================================================")
 
-    avg_test_reward = test_running_reward / total_test_episodes
+    avg_test_reward = test_running_reward / args.num_episodes
     avg_test_reward = round(avg_test_reward, 2)
     print("average test reward : " + str(avg_test_reward))
-
+    if not args.dontsave and args.optimal:
+        with open(optimal_data_path, 'wb') as file:
+            pickle.dump(dataload, file)
+        print("optimal data saved at : " + optimal_data_path + " with " + str(num_optimal) + " samples")
+    if not args.dontsave and args.suboptimal:
+        with open(suboptimal_data_path, 'wb') as file:
+            pickle.dump(dataload, file)
+        print("suboptimal data saved at : " + suboptimal_data_path + " with " + str(num_suboptimal) + " samples")
     print("============================================================================================")
 
+    print("length of dataload ", len(dataload))
+    print("feilds of dataload ", dataload[0].keys())
+    for i in range(args.num_episodes):
+        accumulator = 0
+        accumulator2 = 0
+        initial_state = dataload[i]['state'][0]
+        env.reset(goal = initial_state[7:9])
+        print('goal location', initial_state[7:9])
+        env.reset_with_obs(initial_state)
+        # print("initial state ", initial_state)
+        # print("initial state as per the model ", env.get_obs())
+        assert np.allclose(env.get_obs(), initial_state)
+        if args.render:
+            env.render()
+        for step in range(len(dataload[i]['reward'])):
+            action = dataload[i]['action'][step]
+            # print(action.dtype)
+            next_state, reward, done, _, info= env.step(action)
+            # print("next state as per the model ", next_state)
+            # print("next state as per the data ", dataload[i]['state'][step+1])
+            try:
+                assert np.allclose(next_state, dataload[i]['state'][step+1])
+            except:
+                print("the two states are not the same ", next_state, dataload[i]['state'][step+1])
+            accumulator += dataload[i]['reward'][step]
+            accumulator2 += reward
+            if args.render:
+                env.render()
+            if args.use_sleep: 
+                time.sleep(0.05)
+        time.sleep(0.1)
+        print("episode {} done : reward {}, actual reward {} ".format(i, accumulator, accumulator2))
+    env.close()
+
+
+
 
 if __name__ == '__main__':
 
diff --git a/train.py b/train.py
index 4f06358..b36f36f 100644
--- a/train.py
+++ b/train.py
@@ -7,16 +7,26 @@
 import numpy as np
 
 import gym
-import roboschool
+import driving
+# import roboschool
+import argparse
 
 from PPO import PPO
 
+
 ################################### Training ###################################
 def train():
     print("============================================================================================")
 
     ####### initialize environment hyperparameters ######
-    env_name = "RoboschoolWalker2d-v1"
+    parser = argparse.ArgumentParser(description='Test the model')
+    parser.add_argument('--seed', type=int, default=1001)
+    parser.add_argument('--goalx', type=int, default=15)
+    parser.add_argument('--goaly', type=int, default=38)
+    parser.add_argument('--env', type=str, default='ContinuousFastRandom-v0')
+    parser.add_argument('--render', action='store_true')
+    parser.add_argument('--use-sleep', action='store_true')
+    args = parser.parse_args()
 
     has_continuous_action_space = True  # continuous action space; else discrete
 
@@ -48,12 +58,19 @@ def train():
     random_seed = 0         # set random seed if required (0 = no random seed)
     #####################################################
 
-    print("training environment name : " + env_name)
-
-    env = gym.make(env_name)
+    print("training environment name : " + args.env)
 
+    env = gym.make(args.env)
+    env.set_goal(args.goalx, args.goaly)
+    print(env.goal)
+    for i in range(100):
+        env.reset()
+        env.render()
+        time.sleep(0.01)
+    env.close()
     # state space dimension
     state_dim = env.observation_space.shape[0]
+    print(state_dim)
 
     # action space dimension
     if has_continuous_action_space:
@@ -68,7 +85,7 @@ def train():
     if not os.path.exists(log_dir):
           os.makedirs(log_dir)
 
-    log_dir = log_dir + '/' + env_name + '/'
+    log_dir = log_dir + '/' + args.env + '/'
     if not os.path.exists(log_dir):
           os.makedirs(log_dir)
 
@@ -78,26 +95,28 @@ def train():
     run_num = len(current_num_files)
 
     #### create new log file for each run
-    log_f_name = log_dir + '/PPO_' + env_name + "_log_" + str(run_num) + ".csv"
+    log_f_name = log_dir + '/PPO_' + args.env + "_log_" + str(run_num) + ".csv"
 
-    print("current logging run number for " + env_name + " : ", run_num)
+    print("current logging run number for " + args.env + " : ", run_num)
     print("logging at : " + log_f_name)
     #####################################################
 
     ################### checkpointing ###################
-    run_num_pretrained = 0      #### change this to prevent overwriting weights in same env_name folder
+    run_num_pretrained = 0      #### change this to prevent overwriting weights in same args.env folder
 
     directory = "PPO_preTrained"
     if not os.path.exists(directory):
           os.makedirs(directory)
 
-    directory = directory + '/' + env_name + '/'
+    directory = directory + '/' + args.env + '/'
     if not os.path.exists(directory):
           os.makedirs(directory)
 
 
-    checkpoint_path = directory + "PPO_{}_{}_{}.pth".format(env_name, random_seed, run_num_pretrained)
+    checkpoint_path = directory + "PPO_{}_{}_{}_{}_{}.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
+    best_model_path = directory + "PPO_{}_{}_{}_{}_{}best.pth".format(args.env, random_seed, args.goalx, args.goaly, run_num_pretrained)
     print("save checkpoint path : " + checkpoint_path)
+    print("save best model path : " + best_model_path)
     #####################################################
 
 
@@ -163,7 +182,7 @@ def train():
 
     time_step = 0
     i_episode = 0
-
+    max_reward = -np.inf
     # training loop
     while time_step <= max_training_timesteps:
 
@@ -173,8 +192,10 @@ def train():
         for t in range(1, max_ep_len+1):
 
             # select action with policy
+            if args.render:
+                env.render()
             action = ppo_agent.select_action(state)
-            state, reward, done, _ = env.step(action)
+            state, reward, done, _ , _= env.step(action)
 
             # saving reward and is_terminals
             ppo_agent.buffer.rewards.append(reward)
@@ -220,6 +241,10 @@ def train():
             if time_step % save_model_freq == 0:
                 print("--------------------------------------------------------------------------------------------")
                 print("saving model at : " + checkpoint_path)
+                if print_avg_reward > max_reward:
+                    max_reward = print_avg_reward
+                    ppo_agent.save(best_model_path)
+                    print("best model saved with reward as ", max_reward)
                 ppo_agent.save(checkpoint_path)
                 print("model saved")
                 print("Elapsed Time  : ", datetime.now().replace(microsecond=0) - start_time)
@@ -228,6 +253,8 @@ def train():
             # break; if the episode is over
             if done:
                 break
+            if args.use_sleep:
+                time.sleep(0.01)
 
         print_running_reward += current_ep_reward
         print_running_episodes += 1
@@ -243,6 +270,7 @@ def train():
     # print total training time
     print("============================================================================================")
     end_time = datetime.now().replace(microsecond=0)
+    print("model with best average reward ", max_reward)
     print("Started training at (GMT) : ", start_time)
     print("Finished training at (GMT) : ", end_time)
     print("Total training time  : ", end_time - start_time)
diff --git a/train_load.sh b/train_load.sh
new file mode 100755
index 0000000..02af589
--- /dev/null
+++ b/train_load.sh
@@ -0,0 +1,18 @@
+# python3 train.py --goalx 15 --goaly 38 --env ContinuousSlowRandom-v0 
+# python3 train.py --goalx 10 --goaly 38 --env ContinuousSlowRandom-v0
+# python3 train.py --goalx 20 --goaly 38 --env ContinuousSlowRandom-v0
+# # python3 train.py --goalx 15 --goaly 38 --env ContinuousFastRandom-v0
+# # python3 train.py --goalx 10 --goaly 38 --env ContinuousFastRandom-v0
+# # python3 train.py --goalx 20 --goaly 38 --env ContinuousFastRandom-v0
+# python3 test.py --goalx 15 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal --threshold -1000
+# python3 test.py --goalx 10 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal --threshold -1000
+# python3 test.py --goalx 20 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal --threshold -1000
+# python3 test.py --goalx 15 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal
+# python3 test.py --goalx 10 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal
+# python3 test.py --goalx 20 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --optimal
+python3 test.py --goalx 15 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal --threshold -1000
+# python3 test.py --goalx 10 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal --threshold -1000
+python3 test.py --goalx 20 --goaly 38 --env ContinuousSlowRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal --threshold -1000
+# python3 test.py --goalx 15 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal
+# python3 test.py --goalx 10 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal
+# python3 test.py --goalx 20 --goaly 38 --env ContinuousFastRandom-v0 --num_episodes 2000 --max_num_samples 1000 --suboptimal