More Cleanup

ax3l · ax3l · commit 192fb405655f · 2025-07-22T22:38:07.000-07:00
DLPack 1.1, e.g., in NumPy 2.1+

Tests do not yet pass.
diff --git a/src/Base/Array4.H b/src/Base/Array4.H
@@ -225,51 +225,76 @@ namespace pyAMReX
             */
 
 
-            // DLPack protocol (CPU, NVIDIA GPU, AMD GPU, Intel GPU, etc.)
+            // DLPack v1.1 protocol (CPU, NVIDIA GPU, AMD GPU, Intel GPU, etc.)
             // https://dmlc.github.io/dlpack/latest/
             // https://github.com/dmlc/dlpack/blob/master/include/dlpack/dlpack.h
             // https://docs.cupy.dev/en/stable/user_guide/interoperability.html#dlpack-data-exchange-protocol
-            .def("__dlpack__", [](Array4<T> const &a4, [[maybe_unused]] py::handle stream = py::none()) {
+            .def("__dlpack__", [](
+                    Array4<T> const &a4
+                    /* TODO:
+                    [[maybe_unused]] py::handle stream,
+                    [[maybe_unused]] std::tuple<int, int> max_version,
+                    [[maybe_unused]] std::tuple<DLDeviceType, int32_t> dl_device,
+                    [[maybe_unused]] bool copy
+                    */
+            )
+            {
                 // Allocate shape/strides arrays
                 constexpr int ndim = 4;
                 auto const len = length(a4);
-                auto *shape = new int64_t[ndim]{a4.nComp(), len.z, len.y, len.x};
-                auto *strides = new int64_t[ndim]{a4.nstride, a4.kstride, a4.jstride, 1};
-
-                // Construct DLTensor
-                auto *dl_tensor = new DLManagedTensor;
-                dl_tensor->dl_tensor.data = const_cast<void*>(static_cast<const void*>(a4.dataPtr()));
-                dl_tensor->dl_tensor.device = dlpack::detect_device_from_pointer(a4.dataPtr());
-                dl_tensor->dl_tensor.ndim = ndim;
-                dl_tensor->dl_tensor.dtype = dlpack::get_dlpack_dtype<T>();
-                dl_tensor->dl_tensor.shape = shape;
-                dl_tensor->dl_tensor.strides = strides;
-                dl_tensor->dl_tensor.byte_offset = 0;
-                dl_tensor->manager_ctx = nullptr;
-                dl_tensor->deleter = [](DLManagedTensor *self) {
+
+                // Construct DLManagedTensorVersioned (DLPack 1.1 standard)
+                auto *dl_mgt_tensor = new DLManagedTensorVersioned;
+                //dl_mgt_tensor->version = DLPackVersion{};
+                dl_mgt_tensor->version.major = 1;
+                dl_mgt_tensor->version.minor = 1;
+                dl_mgt_tensor->flags = 0; // No special flags
+                dl_mgt_tensor->dl_tensor.data = const_cast<void*>(static_cast<const void*>(a4.dataPtr()));
+                dl_mgt_tensor->dl_tensor.device = dlpack::detect_device_from_pointer(a4.dataPtr());
+                dl_mgt_tensor->dl_tensor.ndim = ndim;
+                dl_mgt_tensor->dl_tensor.dtype = dlpack::get_dlpack_dtype<T>();
+                dl_mgt_tensor->dl_tensor.shape = new int64_t[ndim]{a4.nComp(), len.z, len.y, len.x};
+                dl_mgt_tensor->dl_tensor.strides = new int64_t[ndim]{a4.nstride, a4.kstride, a4.jstride, 1};
+                dl_mgt_tensor->dl_tensor.byte_offset = 0;
+                dl_mgt_tensor->manager_ctx = nullptr;  // TODO: we can increase/decrease the Python ref counter of the producer here
+                dl_mgt_tensor->deleter = [](DLManagedTensorVersioned *self) {
                     delete[] self->dl_tensor.shape;
                     delete[] self->dl_tensor.strides;
                     delete self;
                 };
                 // Return as Python capsule
-                return py::capsule(dl_tensor, "dltensor", [](void* ptr) {
-                    auto* tensor = static_cast<DLManagedTensor*>(ptr);
-                    tensor->deleter(tensor);
-                });
+                return py::capsule(
+                    dl_mgt_tensor,
+                    "dltensor",
+                    /*[](void* ptr) {
+                        auto* tensor = static_cast<DLManagedTensorVersioned*>(ptr);
+                        tensor->deleter(tensor);
+                    }*/
+                    [](PyObject *capsule)
+                    {
+                        auto *p = static_cast<DLManagedTensorVersioned*>(
+                            PyCapsule_GetPointer(capsule, "dltensor"));
+                        if (p && p->deleter)
+                            p->deleter(p);
+                    }
+                );
             },
-                py::arg("stream") = py::none(),
+                //py::arg("stream") = py::none(),
+                // ... other args & their defaults
                 R"doc(
                 DLPack protocol for zero-copy tensor exchange.
                 See https://dmlc.github.io/dlpack/latest/ for details.
                 )doc"
             )
             .def("__dlpack_device__", [](Array4<T> const &a4) {
                 DLDevice device = dlpack::detect_device_from_pointer(a4.dataPtr());
-                return std::make_tuple(device.device_type, device.device_id);
+                return std::make_tuple(static_cast<int32_t>(device.device_type), device.device_id);
             }, R"doc(
                 DLPack device info (device_type, device_id).
             )doc")
 
+
+
             .def("to_host", [](Array4<T> const & a4) {
                 // py::tuple to std::vector
                 auto const a4i = pyAMReX::array_interface(a4);
diff --git a/src/dlpack.h b/src/dlpack.h
@@ -40,9 +40,9 @@ extern "C" {
  */
 typedef struct {
   /*! \brief DLPack major version. */
-  uint32_t major;
+  uint32_t major = 1;
   /*! \brief DLPack minor version. */
-  uint32_t minor;
+  uint32_t minor = 1;
 } DLPackVersion;
 
 /*!
@@ -238,37 +238,7 @@ typedef struct {
   uint64_t byte_offset;
 } DLTensor;
 
-/*!
- * \brief C Tensor object, manage memory of DLTensor. This data structure is
- *  intended to facilitate the borrowing of DLTensor by another framework. It is
- *  not meant to transfer the tensor. When the borrowing framework doesn't need
- *  the tensor, it should call the deleter to notify the host that the resource
- *  is no longer needed.
- *
- * \note This data structure is used as Legacy DLManagedTensor
- *       in DLPack exchange and is deprecated after DLPack v0.8
- *       Use DLManagedTensorVersioned instead.
- *       This data structure may get renamed or deleted in future versions.
- *
- * \sa DLManagedTensorVersioned
- */
-typedef struct DLManagedTensor {
-  /*! \brief DLTensor which is being memory managed */
-  DLTensor dl_tensor;
-  /*! \brief the context of the original host framework of DLManagedTensor in
-   *   which DLManagedTensor is used in the framework. It can also be NULL.
-   */
-  void * manager_ctx;
-  /*!
-   * \brief Destructor - this should be called
-   * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
-   * NULL if there is no way for the caller to provide a reasonable destructor.
-   * The destructor deletes the argument self as well.
-   */
-  void (*deleter)(struct DLManagedTensor * self);
-} DLManagedTensor;
-
-// bit masks used in in the DLManagedTensorVersioned
+// bit masks used in the DLManagedTensorVersioned
 
 /*! \brief bit mask to indicate that the tensor is read only. */
 #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
@@ -344,34 +314,35 @@ namespace pyAMReX::dlpack
     AMREX_INLINE
     DLDataType get_dlpack_dtype ()
     {
+        using V = std::decay_t<T>;
         DLDataType dtype{};
 
-        if constexpr (std::is_same_v<T, float>) {
+        if constexpr (std::is_same_v<V, float>) {
             dtype.code = kDLFloat;
             dtype.bits = 32;
             dtype.lanes = 1;
         }
-        else if constexpr (std::is_same_v<T, double>) {
+        else if constexpr (std::is_same_v<V, double>) {
             dtype.code = kDLFloat;
             dtype.bits = 64;
             dtype.lanes = 1;
         }
-        else if constexpr (std::is_same_v<T, int32_t>) {
+        else if constexpr (std::is_same_v<V, int32_t>) {
             dtype.code = kDLInt;
             dtype.bits = 32;
             dtype.lanes = 1;
         }
-        else if constexpr (std::is_same_v<T, int64_t>) {
+        else if constexpr (std::is_same_v<V, int64_t>) {
             dtype.code = kDLInt;
             dtype.bits = 64;
             dtype.lanes = 1;
         }
-        else if constexpr (std::is_same_v<T, uint32_t>) {
+        else if constexpr (std::is_same_v<V, uint32_t>) {
             dtype.code = kDLUInt;
             dtype.bits = 32;
             dtype.lanes = 1;
         }
-        else if constexpr (std::is_same_v<T, uint64_t>) {
+        else if constexpr (std::is_same_v<V, uint64_t>) {
             dtype.code = kDLUInt;
             dtype.bits = 64;
             dtype.lanes = 1;
diff --git a/tests/test_array4.py b/tests/test_array4.py
@@ -31,7 +31,9 @@ def test_array4():
     )
     print(f"\nx: {x.__array_interface__} {x.dtype}")
     arr = amr.Array4_double(x)
-    print(f"arr: {arr.__array_interface__}")
+    print(f"arr: DLPack device info: {arr.__dlpack_device__()}")
+    # print(f"arr: DLPack: {arr.__dlpack__()}")
+    print(f"x.shape: {x.shape}")
     print(arr)
     assert arr.nComp == 1
 
@@ -44,16 +46,16 @@ def test_array4():
     assert arr[0, 0, 0] == 1
     assert arr[3, 2, 1] == 1
 
-    # copy to numpy
-    c_arr2np = np.array(arr, copy=True)  # segfaults on Windows
+    # copy to numpy using DLPack
+    c_arr2np = np.from_dlpack(arr)
     assert c_arr2np.ndim == 4
     assert c_arr2np.dtype == np.dtype("double")
     print(f"c_arr2np: {c_arr2np.__array_interface__}")
     np.testing.assert_array_equal(x, c_arr2np[0, :, :, :])
     assert c_arr2np[0, 1, 1, 1] == 42
 
-    # view to numpy
-    v_arr2np = np.array(arr, copy=False)
+    # view to numpy using DLPack
+    v_arr2np = np.from_dlpack(arr)
     assert c_arr2np.ndim == 4
     assert v_arr2np.dtype == np.dtype("double")
     np.testing.assert_array_equal(x, v_arr2np[0, :, :, :])
@@ -65,7 +67,7 @@ def test_array4():
 
     # copy array4 (view)
     c_arr = amr.Array4_double(arr)
-    v_carr2np = np.array(c_arr, copy=False)
+    v_carr2np = np.from_dlpack(c_arr)
     x[1, 1, 1] = 44
     assert v_carr2np[0, 1, 1, 1] == 44