-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimple.py
More file actions
37 lines (30 loc) · 1.63 KB
/
simple.py
File metadata and controls
37 lines (30 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gymnasium as gym
import panda_gym
#we use this environment as this has a sparse reward function which helps us demonstrate HER.
env = gym.make("PandaPush-v3")
observation, info = env.reset()
print(env.observation_space)
#Dict('achieved_goal': Box(-10.0, 10.0, (3,), float32), 'desired_goal': Box(-10.0, 10.0, (3,), float32), 'observation': Box(-10.0, 10.0, (18,), float32))
print(env.action_space)
#Box(-1.0, 1.0, (3,), float32)
for _ in range(10):
action = env.action_space.sample() # agent policy that uses the observation and info
print(action)
#contninous action we need to give value for each joint between -2 <-> 2
#Action: [ 0.72488785 -0.7759044 -0.5041433 ]
observation, reward, terminated, truncated, info = env.step(action)
print(observation, reward, terminated, truncated, info)
#{'observation': array([ 5.47938906e-02, 6.45167893e-03, 1.77058756e-01, -1.48582861e-01,
# 6.28416166e-02, -1.37010586e+00, -2.38760710e-02, 2.47766189e-02,
# 1.99895296e-02, 4.48686751e-06, -3.08392118e-05, -3.87872387e-06,
# -5.73867237e-06, 1.09538505e-05, -5.08076300e-06, 5.10965457e-08,
# -2.53998151e-04, -9.67000524e-05], dtype=float32), 'achieved_goal': array([-0.02387607, 0.02477662, 0.01998953], dtype=float32), 'desired_goal': array([-0.05123958, 0.13161023, 0.02 ], dtype=float32)}
# reward: -1.0
# terminated: False
# truncated: False
# info {'is_success': False}
print(env.compute_reward(observation['achieved_goal'], observation['desired_goal'], dict()))
#-1.0
if terminated or truncated:
observation, info = env.reset()
env.close()