[MISC] More robust detection of invalid simulation state. (#2112)

duburcqa · web-flow · commit 391f039953c7 · 2025-12-12T18:17:52.000+01:00
* Use bitmask for return code to ensure safe write without atomic.
* More robust detection of invalid simulation state.
* Raise clear exception when calling 'ti_to_python' on ti.(Vector|Matrix).
* More comprehensive unit test for apply_links_external_(force|torque).
diff --git a/genesis/engine/solvers/rigid/collider_decomp.py b/genesis/engine/solvers/rigid/collider_decomp.py
@@ -1451,7 +1451,7 @@ def func_broad_phase(
 
                         i_p = collider_state.n_broad_pairs[i_b]
                         if i_p == collider_info.max_collision_pairs_broad[None]:
-                            errno[None] = 1
+                            errno[None] = errno[None] | 0b00000000000000000000000000000001
                             break
                         collider_state.broad_collision_pairs[i_p, i_b][0] = i_ga
                         collider_state.broad_collision_pairs[i_p, i_b][1] = i_gb
@@ -2175,7 +2175,7 @@ def func_add_contact(
 
         collider_state.n_contacts[i_b] = i_c + 1
     else:
-        errno[None] = 2
+        errno[None] = errno[None] | 0b00000000000000000000000000000010
 
 
 @ti.func
diff --git a/genesis/engine/solvers/rigid/constraint_solver_decomp.py b/genesis/engine/solvers/rigid/constraint_solver_decomp.py
@@ -1416,7 +1416,7 @@ def func_update_qacc(
         dofs_state.force[i_d, i_b] = dofs_state.qf_smooth[i_d, i_b] + constraint_state.qfrc_constraint[i_d, i_b]
         constraint_state.qacc_ws[i_d, i_b] = constraint_state.qacc[i_d, i_b]
         if ti.math.isnan(constraint_state.qacc[i_d, i_b]):
-            errno[None] = 3
+            errno[None] = errno[None] | 0b00000000000000000000000000000100
 
 
 @ti.kernel(fastcache=gs.use_fastcache)
diff --git a/genesis/engine/solvers/rigid/rigid_solver_decomp.py b/genesis/engine/solvers/rigid/rigid_solver_decomp.py
@@ -905,6 +905,7 @@ def substep(self, f):
                 rigid_global_info=self._rigid_global_info,
                 static_rigid_sim_config=self._static_rigid_sim_config,
                 contact_island_state=self.constraint_solver.contact_island.contact_island_state,
+                errno=self._errno,
             )
             if self._requires_grad:
                 kernel_save_adjoint_cache(
@@ -916,27 +917,27 @@ def substep(self, f):
                 )
 
     def check_errno(self):
-        # Note that errno must be evaluated BEFORE match because otherwise it will be evaluated for each case...
-        # See official documentation: https://docs.python.org/3.10/reference/compound_stmts.html#overview
         if gs.use_zerocopy:
             errno = ti_to_torch(self._errno, copy=None).item()
         else:
             errno = kernel_get_errno(self._errno)
-        match errno:
-            case 1:
-                max_collision_pairs_broad = self.collider._collider_info.max_collision_pairs_broad[None]
-                gs.raise_exception(
-                    f"Exceeding max number of broad phase candidate contact pairs ({max_collision_pairs_broad}). "
-                    f"Please increase the value of RigidSolver's option 'multiplier_collision_broad_phase'."
-                )
-            case 2:
-                max_contact_pairs = self.collider._collider_info.max_contact_pairs[None]
-                gs.raise_exception(
-                    f"Exceeding max number of contact pairs ({max_contact_pairs}). Please increase the value of "
-                    "RigidSolver's option 'max_collision_pairs'."
-                )
-            case 3:
-                gs.raise_exception("Invalid accelerations causing 'nan'. Please decrease Rigid simulation timestep.")
+
+        if errno & 0b00000000000000000000000000000001:
+            max_collision_pairs_broad = self.collider._collider_info.max_collision_pairs_broad[None]
+            gs.raise_exception(
+                f"Exceeding max number of broad phase candidate contact pairs ({max_collision_pairs_broad}). "
+                f"Please increase the value of RigidSolver's option 'multiplier_collision_broad_phase'."
+            )
+        if errno & 0b00000000000000000000000000000010:
+            max_contact_pairs = self.collider._collider_info.max_contact_pairs[None]
+            gs.raise_exception(
+                f"Exceeding max number of contact pairs ({max_contact_pairs}). Please increase the value of "
+                "RigidSolver's option 'max_collision_pairs'."
+            )
+        if errno & 0b00000000000000000000000000000100:
+            gs.raise_exception("Invalid constraint forces causing 'nan'. Please decrease Rigid simulation timestep.")
+        if errno & 0b00000000000000000000000000001000:
+            gs.raise_exception("Invalid accelerations causing 'nan'. Please decrease Rigid simulation timestep.")
 
     def _kernel_detect_collision(self):
         self.collider.reset(cache_only=True)
@@ -1241,6 +1242,7 @@ def substep_pre_coupling_grad(self, f):
             rigid_global_info=self._rigid_global_info,
             static_rigid_sim_config=self._static_rigid_sim_config,
             contact_island_state=self.constraint_solver.contact_island.contact_island_state,
+            errno=self._errno,
         )
 
         # We cannot use [kernel_forward_dynamics.grad] because we read [dofs_state.acc] and overwrite it in the kernel,
@@ -1336,6 +1338,7 @@ def substep_post_coupling(self, f):
                 rigid_global_info=self._rigid_global_info,
                 static_rigid_sim_config=self._static_rigid_sim_config,
                 contact_island_state=self.constraint_solver.contact_island.contact_island_state,
+                errno=self._errno,
             )
         elif isinstance(self.sim.coupler, IPCCoupler):
             # For IPCCoupler, perform full rigid body computation in post-coupling phase
@@ -4399,6 +4402,7 @@ def kernel_step_2(
     rigid_global_info: array_class.RigidGlobalInfo,
     static_rigid_sim_config: ti.template(),
     contact_island_state: array_class.ContactIslandState,
+    errno: array_class.V_ANNOTATION,
 ):
     # Position, Velocity and Acceleration data must be consistent when computing links acceleration, otherwise it
     # would not corresponds to anyting physical. There is no other way than doing this right before integration,
@@ -4457,6 +4461,7 @@ def kernel_step_2(
             dofs_state=dofs_state,
             rigid_global_info=rigid_global_info,
             static_rigid_sim_config=static_rigid_sim_config,
+            errno=errno,
         )
 
         if ti.static(not static_rigid_sim_config.enable_mujoco_compatibility):
@@ -5511,11 +5516,9 @@ def kernel_update_vgeoms(
     _B = links_state.pos.shape[1]
     ti.loop_config(serialize=ti.static(static_rigid_sim_config.para_level < gs.PARA_LEVEL.ALL))
     for i_g, i_b in ti.ndrange(n_vgeoms, _B):
+        i_l = vgeoms_info.link_idx[i_g]
         vgeoms_state.pos[i_g, i_b], vgeoms_state.quat[i_g, i_b] = gu.ti_transform_pos_quat_by_trans_quat(
-            vgeoms_info.pos[i_g],
-            vgeoms_info.quat[i_g],
-            links_state.pos[vgeoms_info.link_idx[i_g], i_b],
-            links_state.quat[vgeoms_info.link_idx[i_g], i_b],
+            vgeoms_info.pos[i_g], vgeoms_info.quat[i_g], links_state.pos[i_l, i_b], links_state.quat[i_l, i_b]
         )
 
 
@@ -6406,14 +6409,9 @@ def func_integrate(
                     else i_0
                 )
 
-                # Prevent nan propagation
-                is_valid = True
-                if ti.static(not BW):
-                    is_valid = ~ti.math.isnan(dofs_state.acc[i_d, i_b])
-                if is_valid:
-                    dofs_state.vel_next[i_d, i_b] = (
-                        dofs_state.vel[i_d, i_b] + dofs_state.acc[i_d, i_b] * rigid_global_info.substep_dt[None]
-                    )
+                dofs_state.vel_next[i_d, i_b] = (
+                    dofs_state.vel[i_d, i_b] + dofs_state.acc[i_d, i_b] * rigid_global_info.substep_dt[None]
+                )
 
     ti.loop_config(serialize=static_rigid_sim_config.para_level < gs.PARA_LEVEL.ALL)
     for i_0, i_b in (
@@ -6520,14 +6518,31 @@ def func_copy_next_to_curr(
     dofs_state: array_class.DofsState,
     rigid_global_info: array_class.RigidGlobalInfo,
     static_rigid_sim_config: ti.template(),
+    errno: array_class.V_ANNOTATION,
 ):
-    ti.loop_config(serialize=static_rigid_sim_config.para_level < gs.PARA_LEVEL.ALL)
-    for I in ti.grouped(ti.ndrange(*dofs_state.vel.shape)):
-        dofs_state.vel[I] = dofs_state.vel_next[I]
+    n_qs = rigid_global_info.qpos.shape[0]
+    n_dofs = dofs_state.vel.shape[0]
+    _B = dofs_state.vel.shape[1]
 
     ti.loop_config(serialize=static_rigid_sim_config.para_level < gs.PARA_LEVEL.ALL)
-    for I in ti.grouped(ti.ndrange(*rigid_global_info.qpos.shape)):
-        rigid_global_info.qpos[I] = rigid_global_info.qpos_next[I]
+    for i_b in range(_B):
+        # Prevent nan propagation
+        is_valid = True
+        for i_d in range(n_dofs):
+            e = dofs_state.vel_next[i_d, i_b]
+            is_valid &= not ti.math.isnan(e)
+        for i_q in range(n_qs):
+            e = rigid_global_info.qpos_next[i_q, i_b]
+            is_valid &= not ti.math.isnan(e)
+
+        if is_valid:
+            for i_d in range(n_dofs):
+                dofs_state.vel[i_d, i_b] = dofs_state.vel_next[i_d, i_b]
+
+            for i_q in range(n_qs):
+                rigid_global_info.qpos[i_q, i_b] = rigid_global_info.qpos_next[i_q, i_b]
+        else:
+            errno[None] = errno[None] | 0b00000000000000000000000000001000
 
 
 @ti.func
@@ -6923,8 +6938,9 @@ def kernel_update_geoms_render_T(
         geom_T = gu.ti_trans_quat_to_T(
             geoms_state.pos[i_g, i_b] + rigid_global_info.envs_offset[i_b], geoms_state.quat[i_g, i_b], EPS
         )
-        for J in ti.static(ti.grouped(ti.ndrange(4, 4))):
-            geoms_render_T[(i_g, i_b, *J)] = ti.cast(geom_T[J], ti.float32)
+        if (ti.abs(geom_T) < 1e20).all():
+            for J in ti.static(ti.grouped(ti.ndrange(4, 4))):
+                geoms_render_T[(i_g, i_b, *J)] = ti.cast(geom_T[J], ti.float32)
 
 
 @ti.kernel(fastcache=gs.use_fastcache)
@@ -6945,8 +6961,9 @@ def kernel_update_vgeoms_render_T(
         geom_T = gu.ti_trans_quat_to_T(
             vgeoms_state.pos[i_g, i_b] + rigid_global_info.envs_offset[i_b], vgeoms_state.quat[i_g, i_b], EPS
         )
-        for J in ti.static(ti.grouped(ti.ndrange(4, 4))):
-            vgeoms_render_T[(i_g, i_b, *J)] = ti.cast(geom_T[J], ti.float32)
+        if (ti.abs(geom_T) < 1e20).all():
+            for J in ti.static(ti.grouped(ti.ndrange(4, 4))):
+                vgeoms_render_T[(i_g, i_b, *J)] = ti.cast(geom_T[J], ti.float32)
 
 
 @ti.kernel(fastcache=gs.use_fastcache)
diff --git a/genesis/utils/misc.py b/genesis/utils/misc.py
@@ -562,7 +562,12 @@ def ti_to_python(
         copy = False
 
     # Leverage zero-copy if enabled
-    batch_shape = value.shape
+    try:
+        batch_shape = value.shape
+    except AttributeError:
+        if isinstance(value, ti.Matrix):
+            raise ValueError("Tensor of type 'ti.Vector', 'ti.Matrix' not supported.")
+        raise
     if use_zerocopy:
         while True:
             try:
diff --git a/tests/test_rigid_physics.py b/tests/test_rigid_physics.py
@@ -316,6 +316,18 @@ def double_ball_pendulum():
     )
     ee = ET.SubElement(link2, "body", name="end_effector", pos="0 0 0.3")
     ET.SubElement(ee, "geom", name="ee_geom", type="sphere", size="0.02", density="200", rgba="1.0 0.8 0.2 1.0")
+    ET.SubElement(
+        ee,
+        "geom",
+        name="marker",
+        type="sphere",
+        contype="0",
+        conaffinity="0",
+        size="0.01",
+        density="0",
+        pos="0 -0.02 0",
+        rgba="0.0 0.0 0.0 1.0",
+    )
 
     return mjcf
 
@@ -1684,7 +1696,13 @@ def test_contact_forces(show_viewer, tol):
 @pytest.mark.required
 @pytest.mark.parametrize("model_name", ["double_ball_pendulum"])
 def test_apply_external_forces(xml_path, show_viewer):
+    GRAVITY = 2.0
+
     scene = gs.Scene(
+        sim_options=gs.options.SimOptions(
+            substeps=2,
+            gravity=(0, 0, -GRAVITY),
+        ),
         viewer_options=gs.options.ViewerOptions(
             camera_pos=(0, -3.5, 2.5),
             camera_lookat=(0.0, 0.0, 1.0),
@@ -1702,29 +1720,67 @@ def test_apply_external_forces(xml_path, show_viewer):
             quat=(1.0, 0, 1.0, 0),
         ),
     )
+    duck = scene.add_entity(
+        morph=gs.morphs.Mesh(
+            file="meshes/duck.obj",
+            scale=0.04,
+            pos=(1.0, 0.0, 1.0),
+            euler=(90, 0, 0),
+            collision=False,
+        ),
+    )
     scene.build()
+    rigid_solver = scene.rigid_solver
 
-    tol = 5e-3
     end_effector_link_idx = robot.links[-1].idx
+    duck_link_idx = duck.links[0].idx
+    duck_mass = duck.get_mass()
     for step in range(801):
-        ee_pos = scene.rigid_solver.get_links_pos([end_effector_link_idx])[0]
+        ee_pos = rigid_solver.get_links_pos([end_effector_link_idx])[0]
+        duck_pos = rigid_solver.get_links_pos([duck_link_idx])[0]
         if step == 0:
-            assert_allclose(ee_pos, [0.8, 0.0, 0.02], tol=tol)
-        elif step == 600:
-            assert_allclose(ee_pos, [0.0, 0.0, 0.82], tol=tol)
+            assert_allclose(ee_pos, (0.8, 0.0, 0.02), tol=1e-4)
+        elif step in (500, 600):
+            assert_allclose(ee_pos, (0.0, 0.0, 0.82), tol=1e-2)
         elif step == 800:
-            assert_allclose(ee_pos, [-0.8 / math.sqrt(2), 0.8 / math.sqrt(2), 0.02], tol=tol)
+            assert_allclose(ee_pos, (-0.8 / math.sqrt(2), 0.8 / math.sqrt(2), 0.02), tol=1e-2)
+        assert_allclose(duck_pos, (1.0, 0.0, 1.0), tol=1e-3)
 
         if step >= 600:
-            force = np.array([[-5.0, 5.0, 0.0]])
-        elif step >= 100:
-            force = np.array([[0.0, 0.0, 10.0]])
+            force = [-4.0, 4.0, 0.0]
+            torque = [0.0, 0.0, 0.0]
+        elif step >= 500:
+            force = [0.0, 0.0, 0.0]
+            torque = [0.0, 0.0, 2.0]
+        elif step >= 50:
+            force = [0.0, 0.0, 10.0]
+            torque = [0.0, 0.0, 0.0]
         else:
-            force = np.array([[0.0, 0.0, 0.0]])
+            force = [0.0, 0.0, 0.0]
+            torque = [0.0, 0.0, 0.0]
 
-        scene.rigid_solver.apply_links_external_force(force=force, links_idx=[end_effector_link_idx])
+        rigid_solver.apply_links_external_force(
+            force=(0, duck_mass * GRAVITY, 0), links_idx=[duck_link_idx], ref="link_com", local=True
+        )
+        rigid_solver.apply_links_external_force(
+            force=force, links_idx=[end_effector_link_idx], ref="link_origin", local=False
+        )
+        rigid_solver.apply_links_external_torque(
+            torque=torque, links_idx=[end_effector_link_idx], ref="link_origin", local=False
+        )
         scene.step()
 
+    rigid_solver.apply_links_external_torque(torque=(0, 1, 0), links_idx=[duck_link_idx], ref="link_com", local=True)
+    assert_allclose(rigid_solver.links_state.cfrc_applied_vel[duck_link_idx, 0].to_numpy(), 0, tol=gs.EPS)
+    assert_allclose(rigid_solver.links_state.cfrc_applied_ang[duck_link_idx, 0].to_numpy(), (0, 0, -1), tol=gs.EPS)
+
+    with np.testing.assert_raises(ValueError):
+        rigid_solver.apply_links_external_force(force=(0, 0, 0), links_idx=[duck_link_idx], ref="root_com", local=True)
+    with np.testing.assert_raises(ValueError):
+        rigid_solver.apply_links_external_torque(
+            torque=(0, 0, 0), links_idx=[duck_link_idx], ref="root_com", local=True
+        )
+
 
 @pytest.mark.required
 def test_mass_mat(show_viewer, tol):