Genesis-Embodied-AI
diff --git a/‎genesis/__init__.py‎
Lines changed: 17 additions & 2 deletions b/‎genesis/__init__.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎genesis/engine/entities/rigid_entity/rigid_entity.py‎
Lines changed: 36 additions & 24 deletions b/‎genesis/engine/entities/rigid_entity/rigid_entity.py‎
Lines changed: 36 additions & 24 deletions
diff --git a/‎genesis/engine/entities/rigid_entity/rigid_link.py‎
Lines changed: 1 addition & 1 deletion b/‎genesis/engine/entities/rigid_entity/rigid_link.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎genesis/engine/solvers/rigid/rigid_solver_decomp.py‎
Lines changed: 31 additions & 9 deletions b/‎genesis/engine/solvers/rigid/rigid_solver_decomp.py‎
Lines changed: 31 additions & 9 deletions
@@ -41,6 +41,7 @@
 backend: gs_backend | None = None
 use_ndarray: bool | None = None
 use_fastcache: bool | None = None
+use_zerocopy: bool | None = None
 EPS: float | None = None
 
 
@@ -117,8 +118,8 @@ def init(
         backend = gs_backend.cpu
 
     # Configure GsTaichi fast cache and array type
-    global use_ndarray, use_fastcache
-    is_ndarray_disabled = (os.environ.get("GS_ENABLE_NDARRAY") or ("0" if sys.platform == "darwin" else "1")) == "0"
+    global use_ndarray, use_fastcache, use_zerocopy
+    is_ndarray_disabled = (os.environ.get("GS_ENABLE_NDARRAY") or ("0" if backend == gs_backend.metal else "1")) == "0"
     if use_ndarray is None:
         _use_ndarray = not (is_ndarray_disabled or performance_mode)
     else:
@@ -136,6 +137,20 @@ def init(
             raise_exception("Genesis previous initialized. GsTaichi fast cache mode cannot be disabled anymore.")
     use_ndarray, use_fastcache = _use_ndarray, _use_fastcache
 
+    # Unlike dynamic vs static array mode, and fastcache, zero-copy can be toggle on/off between init without issue.
+    # FIXME: ti.Field does not support zero-copy on Metal for now because of a bug in Torch itself.
+    # See: https://github.com/pytorch/pytorch/pull/168193
+    # FIXME: Zero-copy is currently broken for ti.Field for some reason...
+    _use_zerocopy = int(os.environ["GS_ENABLE_ZEROCOPY"]) if "GS_ENABLE_ZEROCOPY" in os.environ else None
+    if backend in (gs_backend.cpu, gs_backend.cuda):
+        if _use_zerocopy is None:
+            _use_zerocopy = True
+    else:
+        if _use_zerocopy:
+            raise_exception(f"Zero-copy only support by GsTaichi dynamic array mode on CPU and CUDA backend.")
+        _use_zerocopy = False
+    use_zerocopy = _use_zerocopy and _use_ndarray  # (_use_ndarray or backend != gs_backend.metal)
+
     # Define the right dtypes in accordance with selected backend and precision
     global ti_float, np_float, tc_float
     if precision == "32":
 
@@ -561,6 +561,7 @@ def _build(self):
 
         self._n_qs = self.n_qs
         self._n_dofs = self.n_dofs
+        self._n_geoms = self.n_geoms
         self._is_built = True
 
         verts_start = 0
@@ -576,6 +577,8 @@ def _build(self):
             self._free_verts_idx_local = torch.cat(free_verts_idx_local)
         if fixed_verts_idx_local:
             self._fixed_verts_idx_local = torch.cat(fixed_verts_idx_local)
+        self._n_free_verts = len(self._free_verts_idx_local)
+        self._n_fixed_verts = len(self._fixed_verts_idx_local)
 
         self._geoms = self.geoms
         self._vgeoms = self.vgeoms
@@ -1347,19 +1350,13 @@ def inverse_kinematics_multilink(
         )
 
         qpos = ti_to_torch(self._IK_qpos_best, transpose=True)
-        if self._solver.n_envs == 0:
-            qpos = qpos[0].clone()
-        else:
-            qpos = qpos[envs_idx]
+        qpos = qpos[0] if self._solver.n_envs == 0 else qpos[envs_idx]
 
         if return_error:
             error_pose = ti_to_torch(self._IK_err_pose_best, transpose=True).reshape((-1, self._IK_n_tgts, 6))[
                 :, :n_links
             ]
-            if self._solver.n_envs == 0:
-                error_pose = error_pose[0].clone()
-            else:
-                error_pose = error_pose[envs_idx]
+            error_pose = error_pose[0] if self._solver.n_envs == 0 else error_pose[envs_idx]
             return qpos, error_pose
         return qpos
 
@@ -2029,23 +2026,36 @@ def get_verts(self):
         verts : torch.Tensor, shape (n_envs, n_verts, 3)
             The vertices of the entity.
         """
-        self._solver.update_verts_for_geoms(range(self.geom_start, self.geom_end))
+        self._solver.update_verts_for_geoms(slice(self.geom_start, self.geom_end))
 
-        tensor = torch.empty((self._solver._B, self.n_verts, 3), dtype=gs.tc_float, device=gs.device)
-        has_fixed_verts, has_free_vertices = len(self._fixed_verts_idx_local) > 0, len(self._free_verts_idx_local) > 0
-        if has_fixed_verts:
-            _kernel_get_fixed_verts(
-                tensor, self._fixed_verts_idx_local, self._fixed_verts_state_start, self._solver.fixed_verts_state
-            )
-        if has_free_vertices:
-            # FIXME: Get around some bug in gstaichi when using gstaichi with metal backend
-            must_copy = gs.backend == gs.metal and has_fixed_verts
-            tensor_free = torch.zeros_like(tensor) if must_copy else tensor
-            _kernel_get_free_verts(
-                tensor_free, self._free_verts_idx_local, self._free_verts_state_start, self._solver.free_verts_state
-            )
-            if must_copy:
-                tensor += tensor_free
+        n_fixed_verts, n_free_vertices = self._n_fixed_verts, self._n_free_verts
+        tensor = torch.empty((self._solver._B, n_fixed_verts + n_free_vertices, 3), dtype=gs.tc_float, device=gs.device)
+
+        if n_fixed_verts > 0:
+            if gs.use_zerocopy:
+                fixed_verts_state = ti_to_torch(self._solver.fixed_verts_state.pos)
+                tensor[:, self._fixed_verts_idx_local] = fixed_verts_state[
+                    self._fixed_verts_state_start : self._fixed_verts_state_start + n_fixed_verts
+                ]
+            else:
+                _kernel_get_fixed_verts(
+                    tensor, self._fixed_verts_idx_local, self._fixed_verts_state_start, self._solver.fixed_verts_state
+                )
+        if n_free_vertices > 0:
+            if gs.use_zerocopy:
+                free_verts_state = ti_to_torch(self._solver.free_verts_state.pos, transpose=True)
+                tensor[:, self._free_verts_idx_local] = free_verts_state[
+                    :, self._free_verts_state_start : self._free_verts_state_start + n_free_vertices
+                ]
+            else:
+                # FIXME: Get around some bug in gstaichi when using gstaichi with metal backend
+                must_copy = gs.backend == gs.metal and n_fixed_verts > 0
+                tensor_free = torch.zeros_like(tensor) if must_copy else tensor
+                _kernel_get_free_verts(
+                    tensor_free, self._free_verts_idx_local, self._free_verts_state_start, self._solver.free_verts_state
+                )
+                if must_copy:
+                    tensor += tensor_free
 
         if self._solver.n_envs == 0:
             tensor = tensor[0]
@@ -2854,6 +2864,8 @@ def n_dofs(self):
     @property
     def n_geoms(self):
         """The number of `RigidGeom` in the entity."""
+        if self._is_built:
+            return self._n_geoms
         return sum(link.n_geoms for link in self._links)
 
     @property
 
@@ -305,7 +305,7 @@ def get_verts(self):
         """
         Get the vertices of the link's collision body (concatenation of all `link.geoms`) in the world frame.
         """
-        self._solver.update_verts_for_geoms(range(self.geom_start, self.geom_end))
+        self._solver.update_verts_for_geoms(slice(self.geom_start, self.geom_end))
 
         if self.is_fixed and not self._entity._batch_fixed_verts:
             tensor = torch.empty((self.n_verts, 3), dtype=gs.tc_float, device=gs.device)
 
@@ -894,7 +894,13 @@ def substep(self):
             )
 
     def check_errno(self):
-        match kernel_get_errno(self._errno):
+        # Note that errno must be evaluated BEFORE match because otherwise it will be evaluated for each case...
+        # See official documentation: https://docs.python.org/3.10/reference/compound_stmts.html#overview
+        if gs.use_zerocopy:
+            errno = int(ti_to_torch(self._errno, copy=None, non_blocking=True))
+        else:
+            errno = kernel_get_errno(self._errno)
+        match errno:
             case 1:
                 max_collision_pairs_broad = self.collider._collider_info.max_collision_pairs_broad[None]
                 gs.raise_exception(
@@ -1362,8 +1368,10 @@ def _sanitize_1D_io_variables(
         _inputs_idx = torch.as_tensor(inputs_idx, dtype=gs.tc_int, device=gs.device).contiguous()
         if _inputs_idx is not inputs_idx:
             gs.logger.debug(ALLOCATE_TENSOR_WARNING)
-        _inputs_idx = torch.atleast_1d(_inputs_idx)
-        if _inputs_idx.ndim != 1:
+        _inputs_ndim = _inputs_idx.ndim
+        if _inputs_ndim == 0:
+            _inputs_idx = _inputs_idx[None]
+        elif _inputs_ndim > 1:
             gs.raise_exception(f"Expecting 1D tensor for `{idx_name}`.")
         if not ((0 <= _inputs_idx).all() or (_inputs_idx < input_size).all()):
             gs.raise_exception(f"`{idx_name}` is out-of-range.")
@@ -1372,19 +1380,23 @@ def _sanitize_1D_io_variables(
             _tensor = torch.as_tensor(tensor, dtype=gs.tc_float, device=gs.device).contiguous()
             if _tensor is not tensor:
                 gs.logger.debug(ALLOCATE_TENSOR_WARNING)
-            tensor = _tensor.unsqueeze(0) if batched and self.n_envs and _tensor.ndim == 1 else _tensor
-
+            tensor_ndim = _tensor.ndim
+            if batched and self.n_envs and tensor_ndim == 1:
+                tensor = _tensor.unsqueeze(0)
+                tensor_ndim += 1
+            else:
+                tensor = _tensor
             if tensor.shape[-1] != len(inputs_idx):
                 gs.raise_exception(f"Last dimension of the input tensor does not match length of `{idx_name}`.")
 
             if batched:
                 if self.n_envs == 0:
-                    if tensor.ndim != 1:
+                    if tensor_ndim != 1:
                         gs.raise_exception(
                             f"Invalid input shape: {tensor.shape}. Expecting a 1D tensor for non-parallelized scene."
                         )
                 else:
-                    if tensor.ndim == 2:
+                    if tensor_ndim == 2:
                         if tensor.shape[0] != len(envs_idx):
                             gs.raise_exception(
                                 f"Invalid input shape: {tensor.shape}. First dimension of the input tensor does not match "
@@ -1395,7 +1407,7 @@ def _sanitize_1D_io_variables(
                             f"Invalid input shape: {tensor.shape}. Expecting a 2D tensor for scene with parallelized envs."
                         )
             else:
-                if tensor.ndim != 1:
+                if tensor_ndim != 1:
                     gs.raise_exception("Expecting 1D output tensor.")
         return tensor, _inputs_idx, envs_idx
 
@@ -2285,7 +2297,12 @@ def get_equality_constraints(self, as_tensor: bool = True, to_torch: bool = True
         return self.constraint_solver.get_equality_constraints(as_tensor, to_torch)
 
     def clear_external_force(self):
-        kernel_clear_external_force(self.links_state, self._rigid_global_info, self._static_rigid_sim_config)
+        if gs.use_zerocopy:
+            for tensor in (self.links_state.cfrc_applied_ang, self.links_state.cfrc_applied_vel):
+                out = ti_to_python(tensor, copy=False, non_blocking=True)
+                out.zero_()
+        else:
+            kernel_clear_external_force(self.links_state, self._rigid_global_info, self._static_rigid_sim_config)
 
     def update_vgeoms(self):
         kernel_update_vgeoms(self.vgeoms_info, self.vgeoms_state, self.links_state, self._static_rigid_sim_config)
@@ -2320,6 +2337,11 @@ def set_drone_rpm(self, n_propellers, propellers_link_idxs, propellers_rpm, prop
         )
 
     def update_verts_for_geoms(self, geoms_idx):
+        if gs.use_zerocopy:
+            verts_updated = ti_to_torch(self.geoms_state.verts_updated, transpose=False)
+            if verts_updated[geoms_idx].all():
+                return
+
         _, geoms_idx, _ = self._sanitize_1D_io_variables(
             None, geoms_idx, self.n_geoms, None, idx_name="geoms_idx", skip_allocation=True, unsafe=False
         )