Reimplement the data access via HalfStepHook (#7)

InnocentBug · web-flow · commit 3d0b1818a7f0 · 2021-11-17T16:09:33.000-06:00
diff --git a/Dockerfile b/Dockerfile
@@ -2,3 +2,4 @@ FROM ssages/pysages-base:latest
 
 COPY . hoomd-dlext
 RUN  cd hoomd-dlext && mkdir build && cd build && cmake .. && make install
+RUN python3 -c "import hoomd; import hoomd.dlext"
diff --git a/dlext/CMakeLists.txt b/dlext/CMakeLists.txt
@@ -4,6 +4,7 @@ set(
     ${COMPONENT_NAME}_sources
     SystemView.cc
     PyDLExt.cc
+    Sampler.cc
 )
 
 pybind11_add_module(${COMPONENT_NAME} SHARED ${${COMPONENT_NAME}_sources} NO_EXTRAS)
diff --git a/dlext/PyDLExt.cc b/dlext/PyDLExt.cc
@@ -3,6 +3,7 @@
 
 #include "PyDLExt.h"
 #include "PyHalfStepHook.h"
+#include "Sampler.h"
 
 
 using namespace sysview;
@@ -54,6 +55,7 @@ PYBIND11_MODULE(dlpack_extension, m)
     // Classes
     export_SystemView(m);
     export_PyHalfStepHook(m);
+    export_Sampler(m);
 
     // Methods
     m.def("positions_types", encapsulate<&positions_types>);
diff --git a/dlext/Sampler.cc b/dlext/Sampler.cc
@@ -0,0 +1,161 @@
+#include "Sampler.h"
+#include "hoomd/HOOMDMath.h"
+#include "dlpack.h"
+#include <stdexcept>
+
+using namespace std;
+namespace py = pybind11;
+
+const char* const kDLTensorCapsuleName = "dltensor";
+constexpr uint8_t kBits = std::is_same<Scalar, float>::value ? 32 : 64;
+
+template <typename>
+constexpr DLDataType dtype();
+template <>
+constexpr DLDataType dtype<Scalar4>() { return DLDataType {kDLFloat, kBits, 1}; }
+template <>
+constexpr DLDataType dtype<Scalar3>() { return DLDataType {kDLFloat, kBits, 1}; }
+template <>
+constexpr DLDataType dtype<Scalar>() { return DLDataType {kDLFloat, kBits, 1}; }
+template <>
+constexpr DLDataType dtype<int3>() { return DLDataType {kDLInt, 32, 1}; }
+template <>
+constexpr DLDataType dtype<unsigned int>() { return DLDataType {kDLUInt, 32, 1}; }
+template <>
+constexpr DLDataType dtype<int>() { return DLDataType {kDLInt, 32, 1}; }
+
+template <typename>
+constexpr int64_t stride1();
+template <>
+constexpr int64_t stride1<Scalar4>() { return 4; }
+template <>
+constexpr int64_t stride1<Scalar3>() { return 3; }
+template <>
+constexpr int64_t stride1<Scalar>() { return 1; }
+template <>
+constexpr int64_t stride1<int3>() { return 3; }
+template <>
+constexpr int64_t stride1<unsigned int>() { return 1; }
+
+template <typename T>
+inline void* opaque(T* data) { return static_cast<void*>(data); }
+
+inline py::capsule encapsulate(DLManagedTensor* dl_managed_tensor)
+{
+  return py::capsule(dl_managed_tensor, kDLTensorCapsuleName);
+}
+
+Sampler::Sampler(shared_ptr<SystemDefinition> sysdef,
+                 py::function python_update)
+  :
+  HalfStepHook(),
+  m_python_update(python_update)
+{
+  this->setSystemDefinition(sysdef);
+}
+
+void Sampler::setSystemDefinition(shared_ptr<SystemDefinition> sysdef)
+{
+  m_sysdef = sysdef;
+  m_pdata = sysdef->getParticleData();
+  m_exec_conf = m_pdata->getExecConf();
+}
+
+void Sampler::run_on_data(py::function py_exec, const access_location::Enum location, const access_mode::Enum mode)
+{
+  if(location == access_location::device and not m_exec_conf->isCUDAEnabled())
+    throw runtime_error("Invalid request for device memory in non-cuda run.");
+
+  const bool on_device = location == access_location::device;
+
+  const ArrayHandle<Scalar4> pos(m_pdata->getPositions(), location, mode);
+  auto pos_bridge = wrap<Scalar4, Scalar>(pos.data, on_device, 4 );
+  auto pos_capsule = encapsulate(&pos_bridge.tensor);
+
+  const ArrayHandle<Scalar4> vel(m_pdata->getVelocities(), location, mode);
+  auto vel_bridge = wrap<Scalar4, Scalar>(vel.data, on_device, 4 );
+  auto vel_capsule = encapsulate(&vel_bridge.tensor);
+
+  const ArrayHandle<unsigned int> rtags(m_pdata->getRTags(), location, mode);
+  auto rtags_bridge = wrap<unsigned int, unsigned int>(rtags.data, on_device, 1);
+  auto rtags_capsule = encapsulate(&rtags_bridge.tensor);
+
+  const ArrayHandle<int3> img(m_pdata->getImages(), location, mode);
+  auto img_bridge = wrap<int3, int>(img.data, on_device, 3);
+  auto img_capsule = encapsulate(&img_bridge.tensor);
+
+  ArrayHandle<Scalar4> force(m_pdata->getNetForce(), location, access_mode::readwrite);
+  auto force_bridge = wrap<Scalar4, Scalar>(force.data, on_device, 4 );
+  auto force_capsule = encapsulate(&force_bridge.tensor);
+
+  py_exec(pos_capsule, vel_capsule, rtags_capsule, img_capsule, force_capsule);
+}
+
+void Sampler::update(unsigned int timestep)
+{
+
+  // Accessing the handles here holds them valid until the block of this function.
+  // This keeps them valid for the python function call
+  auto location = m_exec_conf->isCUDAEnabled() ? access_location::device : access_location::host;
+
+  // const ArrayHandle<Scalar4> pos(m_pdata->getPositions(), location, access_mode::read);
+  // auto pos_tensor = wrap<Scalar4, Scalar>(pos.data, 4 );
+  // ArrayHandle<Scalar4> vel(m_pdata->getVelocities(), location, access_mode::read);
+  // auto vel_tensor = wrap<Scalar4, Scalar>(vel.data, 4);
+  // ArrayHandle<unsigned int> rtags(m_pdata->getRTags(), location, access_mode::read);
+  // auto rtag_tensor = wrap<unsigned int, unsigned int>(rtags.data, 1);
+  // ArrayHandle<int3> img(m_pdata->getImages(), location, access_mode::read);
+  // auto img_tensor = wrap<int3, int>(img.data, 3);
+
+  // ArrayHandle<Scalar4> net_forces(m_pdata->getNetForce(), location, access_mode::readwrite);
+  // auto force_tensor = wrap<Scalar4, Scalar>(net_forces.data, 4);
+
+  // m_python_update(pos_tensor, vel_tensor, rtag_tensor, img_tensor, force_tensor,
+  //                 m_pdata->getGlobalBox());
+  this->run_on_data(m_python_update, location, access_mode::read);
+}
+
+template <typename TV, typename TS>
+DLDataBridge Sampler::wrap(TV* ptr,
+                           const bool on_device,
+                           const int64_t size2,
+                           const uint64_t offset,
+                           uint64_t stride1_offset) {
+  assert((size2 >= 1)); // assert is a macro so the extra parentheses are requiered here
+
+  const unsigned int particle_number = this->m_pdata->getN();
+  const int gpu_id = on_device ? m_exec_conf->getGPUIds()[0] : m_exec_conf->getRank();
+
+  DLDataBridge bridge;
+  bridge.tensor.manager_ctx = NULL;
+  bridge.tensor.deleter = NULL;
+
+  bridge.tensor.dl_tensor.data = opaque(ptr);
+  bridge.tensor.dl_tensor.ctx = DLContext{on_device ? kDLGPU : kDLCPU, gpu_id};
+  bridge.tensor.dl_tensor.dtype = dtype<TS>();
+
+  bridge.shape.push_back(particle_number);
+  if (size2 > 1)
+    bridge.shape.push_back(size2);
+
+  bridge.strides.push_back(stride1<TV>() + stride1_offset);
+  if (size2 > 1)
+    bridge.strides.push_back(1);
+
+  bridge.tensor.dl_tensor.ndim = bridge.shape.size();
+  bridge.tensor.dl_tensor.dtype = dtype<TS>();
+  bridge.tensor.dl_tensor.shape = reinterpret_cast<std::int64_t*>(bridge.shape.data());
+  bridge.tensor.dl_tensor.strides = reinterpret_cast<std::int64_t*>(bridge.strides.data());
+  bridge.tensor.dl_tensor.byte_offset = offset;
+
+  return bridge;
+}
+
+
+void export_Sampler(py::module& m)
+{
+  py::class_<Sampler, std::shared_ptr<Sampler> >(m, "DLextSampler", py::base<HalfStepHook>())
+    .def(py::init<std::shared_ptr<SystemDefinition>, py::function>())
+    .def("run_on_data", &Sampler::run_on_data)
+    ;
+}
diff --git a/dlext/Sampler.h b/dlext/Sampler.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: MIT
+// This file is part of `hoomd-dlext`, see LICENSE.md
+
+#ifndef SAMPLER_H
+#define SAMPLER_H
+
+#include "hoomd/HalfStepHook.h"
+#include "hoomd/GlobalArray.h"
+#include <hoomd/extern/pybind/include/pybind11/pybind11.h>
+#include "dlpack.h"
+
+struct DLDataBridge {
+    std::vector<int64_t> shape;
+    std::vector<int64_t> strides;
+    DLManagedTensor tensor;
+};
+
+
+class Sampler : public HalfStepHook
+    {
+    public:
+        //! Constructor
+      Sampler(std::shared_ptr<SystemDefinition> sysdef, pybind11::function python_update);
+
+      virtual void setSystemDefinition(std::shared_ptr<SystemDefinition> sysdef) override;
+
+        //! Take one timestep forward
+      virtual void update(unsigned int timestep) override;
+
+      // run a custom python function on data from hoomd
+      // access_mode is ignored for forces. Forces are returned in readwrite mode always.
+      void run_on_data(pybind11::function py_exec, const access_location::Enum location, const access_mode::Enum mode);
+
+    private:
+      template<typename TS, typename TV>
+      DLDataBridge wrap(TS* const ptr, const bool, const int64_t size2 = 1, const uint64_t offset=0, uint64_t stride1_offset = 0);
+      pybind11::function m_python_update;
+      std::shared_ptr<SystemDefinition> m_sysdef;
+      std::shared_ptr<ParticleData> m_pdata;
+      std::shared_ptr<const ExecutionConfiguration> m_exec_conf;
+    };
+
+void export_Sampler(pybind11::module& m);
+
+#endif//SAMPLER_H
diff --git a/dlext/__init__.py b/dlext/__init__.py
@@ -20,4 +20,5 @@
     tags,
     rtags,
     velocities_masses,
+    DLextSampler,
 )

Original file line number	Diff line number	Diff line change
`@@ -2,3 +2,4 @@ FROM ssages/pysages-base:latest`
`2`	`2`
`3`	`3`	`COPY . hoomd-dlext`
`4`	`4`	`RUN cd hoomd-dlext && mkdir build && cd build && cmake .. && make install`
	`5`	`+RUN python3 -c "import hoomd; import hoomd.dlext"`
Original file line number	Diff line number	Diff line change
`@@ -4,6 +4,7 @@ set(`
`4`	`4`	`${COMPONENT_NAME}_sources`
`5`	`5`	`SystemView.cc`
`6`	`6`	`PyDLExt.cc`
	`7`	`+ Sampler.cc`
`7`	`8`	`)`
`8`	`9`
`9`	`10`	`pybind11_add_module(${COMPONENT_NAME} SHARED ${${COMPONENT_NAME}_sources} NO_EXTRAS)`
Original file line number	Diff line number	Diff line change
`@@ -20,4 +20,5 @@`
`20`	`20`	`tags,`
`21`	`21`	`rtags,`
`22`	`22`	`velocities_masses,`
	`23`	`+ DLextSampler,`
`23`	`24`	`)`