Manipulator Agnostic Gripper Control for Reinforcement Learning #2691
-
IntroHi! I am a Ph.d student and am using MuJoCo (MJX) for research in Deep Reinforcement Learning. I am unsure rather to post this question in the Brax repo or here, so sorry if this is misplaced :) My setupMy system setup is
My questionI am looking to train an RL algorithm to move and actuate a gripper for manipulation tasks. While i could attach the gripper to a manipulator like the Franka Panda or the UR, I ideally want to train a policy which produces changes in end-effector poses such that I can handle the manipulator control externally and have the policy be manipulator agnostic. I know that in MuJoCo there exists mocap objects which i can weld to the gripper with a free joint and then control the grippers pose using the mocap object. Using sliding joint actuators I also have to deal with actuator dynamics and their gains which is a non issue with the mocap object, and therefore at the moment i would prefer the mocap approach. Is it possible to use the MLP outputs to control the mocap pose without actuators? A minimal example, pseudo code or just some clarification would me immensely appreciated! :) Thanks in advance! Minimal model and/or code that explain my questionNo response Confirmations
|
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 1 reply
-
Why aren't you using a Cartesian controller? https://youtu.be/s-0JHanqV1A (link to model therein) |
Beta Was this translation helpful? Give feedback.
-
UpdateI have now tried this approach for a while and I have encountered some interesting behavior that seem to cause some problems. As seen in the video and code below, I am running the default 2f85 model from MuJoCo Menagerie along with a cube. I then add joints and actuators to control the Cartesian pose of the gripper while grasping the cube. When I move the gripper in x, y and z no issues occur, however when i change the orientation, it seems the gripper is lacking behind, or the cube is being pushed ahead of the motion. I have been trying to solve this with different approaches, and while the phenomenon is reduced when I have lower gains on the position actuators (Kp = 10 and Kv = 10), this introduces the problem of not tracking the reference particularly well. Any and all insights/help/solutions/comments are very much appreciated! Video of the phenomenonhuh-2025-08-05_09.59.24.online-video-cutter.com.1.1.mp4This is the code i used in the videoimport time
import glfw
import mujoco
import mujoco as mj
import mujoco.viewer
import numpy as np
from robot_descriptions import robotiq_2f85_mj_description
_XML = """
<mujoco model="empty scene">
<compiler angle="radian" autolimits="true" />
<option timestep="0.002"
integrator="implicitfast"
solver="Newton"
gravity="0 0 -9.82"
cone="elliptic"
sdf_iterations="5"
sdf_initpoints="30"
noslip_iterations="2"
>
<flag multiccd="enable" nativeccd="enable" />
</option>
<statistic center="0.3 0 0.3" extent="0.8" meansize="0.08" />
<visual>
<headlight diffuse="0.6 0.6 0.6" ambient="0.1 0.1 0.1" specular="0 0 0" />
<rgba haze="0.15 0.25 0.35 1" />
<global azimuth="120" elevation="-20" offwidth="2000" offheight="2000" />
</visual>
<asset>
<texture type="skybox" builtin="gradient" rgb1="0.3 0.5 0.7" rgb2="0 0 0" width="512"
height="3072" />
<texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.2 0.3 0.4"
rgb2="0.1 0.2 0.3" markrgb="0.8 0.8 0.8" width="300" height="300" />
<material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5"
reflectance="0.2" />
</asset>
<worldbody>
<light pos="0 0 1.5" dir="0 0 -1" directional="true" />
<geom name="floor" size="0 0 0.5" type="plane" material="groundplane" solimp="0.0 0.0 0.0 0.0 1" />
</worldbody>
</mujoco>
"""
def add_act_freejoint(
spec: mj.MjSpec,
x_lim: tuple[float] = (-1, 1),
y_lim: tuple[float] = (-1, 1),
z_lim: tuple[float] = (-1, 1),
roll_lim: tuple[float] = (-np.pi, np.pi),
pitch_lim: tuple[float] = (-np.pi, np.pi),
yaw_lim: tuple[float] = (-np.pi, np.pi),
kp_pos: float = 1000,
kp_ori: float = 1000,
kv_pos: float = 1000,
kv_ori: float = 1000,
) -> mj.MjSpec:
base = spec.worldbody.first_body()
base.add_joint(name="x", axis=[1, 0, 0], type=mj.mjtJoint.mjJNT_SLIDE, range=x_lim)
base.add_joint(name="y", axis=[0, 1, 0], type=mj.mjtJoint.mjJNT_SLIDE, range=y_lim)
base.add_joint(name="z", axis=[0, 0, 1], type=mj.mjtJoint.mjJNT_SLIDE, range=z_lim)
base.add_joint(
name="roll", axis=[1, 0, 0], type=mj.mjtJoint.mjJNT_HINGE, range=roll_lim
)
base.add_joint(
name="pitch",
axis=[0, 1, 0],
type=mj.mjtJoint.mjJNT_HINGE,
range=pitch_lim,
)
base.add_joint(
name="yaw", axis=[0, 0, 1], type=mj.mjtJoint.mjJNT_HINGE, range=yaw_lim
)
spec.add_actuator(
name="x", target="x", trntype=mj.mjtTrn.mjTRN_JOINT, ctrlrange=x_lim
).set_to_position(kp=kp_pos, kv=kv_pos)
spec.add_actuator(
name="y", target="y", trntype=mj.mjtTrn.mjTRN_JOINT, ctrlrange=y_lim
).set_to_position(kp=kp_pos, kv=kv_pos)
spec.add_actuator(
name="z", target="z", trntype=mj.mjtTrn.mjTRN_JOINT, ctrlrange=z_lim
).set_to_position(kp=kp_pos, kv=kv_pos)
spec.add_actuator(
name="roll",
target="roll",
trntype=mj.mjtTrn.mjTRN_JOINT,
ctrlrange=roll_lim,
forcerange=[-10, 10],
).set_to_position(kp=kp_ori, kv=kv_ori)
spec.add_actuator(
name="pitch",
target="pitch",
trntype=mj.mjtTrn.mjTRN_JOINT,
ctrlrange=pitch_lim,
forcerange=[-10, 10],
).set_to_position(kp=kp_ori, kv=kv_ori)
spec.add_actuator(
name="yaw",
target="yaw",
trntype=mj.mjtTrn.mjTRN_JOINT,
ctrlrange=yaw_lim,
forcerange=[-10, 10],
).set_to_position(kp=kp_ori, kv=kv_ori)
return spec
class MjSim:
def __init__(self):
self._model, self._data = self.init()
def init(self):
gripper = mj.MjSpec.from_file(robotiq_2f85_mj_description.MJCF_PATH)
scene = mj.MjSpec.from_string(_XML)
K = 100
# add a "freejoint" which I can actuate
gripper = add_act_freejoint(gripper, kp_ori=K, kv_ori=K, kp_pos=K, kv_pos=K)
gripper.worldbody.first_body().gravcomp = 1
scene.worldbody.add_frame(
name=f"{gripper.modelname}_1",
pos=[0, 0, 0.25],
euler=np.array([np.pi, 0, 0]),
).attach_body(gripper.worldbody.first_body(), f"{gripper.modelname}_1/")
prop = scene.worldbody.add_body(name="prop")
prop.add_geom(
name="prop",
type=mj.mjtGeom.mjGEOM_BOX,
size=[0.018, 0.018, 0.018],
mass=0.3,
)
prop.add_freejoint()
m = scene.compile()
d = mj.MjData(m)
# step once to compute the poses of objects
mj.mj_step(m, d)
return m, d
def run(self):
with mujoco.viewer.launch_passive(
model=self.model,
data=self.data,
show_left_ui=False,
show_right_ui=True,
) as viewer:
while viewer.is_running():
step_start = time.time()
mj.mj_step(self.model, self.data)
viewer.sync()
# Rudimentary time keeping, will drift relative to wall clock.
time_until_next_step = self.model.opt.timestep - (
time.time() - step_start
)
if time_until_next_step > 0:
time.sleep(time_until_next_step)
@property
def data(self) -> mj.MjData:
return self._data
@property
def model(self) -> mj.MjModel:
return self._model
def keyboard_callback(self, key: int):
if key is glfw.KEY_SPACE:
print("You pressed space...")
if __name__ == "__main__":
sim = MjSim()
sim.run() Thanks in advance! :) |
Beta Was this translation helpful? Give feedback.
Why aren't you using a Cartesian controller?
https://youtu.be/s-0JHanqV1A (link to model therein)