Don't use dl_tensor.byte_offset when exporting capsules. (#21153)

AWoloszyn · web-flow · commit a6bbf4a6143d · 2025-06-23T14:02:10.000-04:00
Pytorch ignores byte_offset at least in some circumstances, meaning we end up with the wrong data getting output. This solves the issue by baking the byte_offset directly into the returned pointer. See: https://github.com/dmlc/dlpack/blob/7f393bbb86a0ddd71fde3e700fc2affa5cdce72d/include/dlpack/dlpack.h#L225 Signed-off-by: Andrew Woloszyn <andrew.woloszyn@gmail.com>
diff --git a/runtime/bindings/python/hal.cc b/runtime/bindings/python/hal.cc
@@ -739,9 +739,13 @@ py::object HalDevice::CreateDLPackCapsule(HalBufferView& buffer_view,
       "Cannot export device buffer");
   static_assert(sizeof(dl_tensor.data) >=
                 sizeof(external_buffer.handle.device_allocation.ptr));
+  // Set the data pointer to the offset, and the byte_offset to 0.
+  // This SHOULD not be required, but some backends (torch GPU for example),
+  // ignore the byte_offset entirely.
   dl_tensor.data =
-      reinterpret_cast<void*>(external_buffer.handle.device_allocation.ptr);
-  dl_tensor.byte_offset = offset;
+      reinterpret_cast<uint8_t*>(external_buffer.handle.device_allocation.ptr) +
+      offset;
+  dl_tensor.byte_offset = 0;
 
   // Create and return capsule.
   PyObject* capsule = PyCapsule_New(static_cast<DLManagedTensor*>(tensor.get()),