cms-patatrack
diff --git a/‎Makefile‎
Lines changed: 2 additions & 2 deletions b/‎Makefile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/alpaka/AlpakaCore/CachingAllocator.h‎
Lines changed: 22 additions & 21 deletions b/‎src/alpaka/AlpakaCore/CachingAllocator.h‎
Lines changed: 22 additions & 21 deletions
diff --git a/‎src/alpaka/AlpakaCore/HostOnlyTask.h‎
Lines changed: 5 additions & 1 deletion b/‎src/alpaka/AlpakaCore/HostOnlyTask.h‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/alpaka/AlpakaCore/alpaka/initialise.cc‎
Lines changed: 32 additions & 0 deletions b/‎src/alpaka/AlpakaCore/alpaka/initialise.cc‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎src/alpaka/AlpakaCore/alpakaConfig.h‎
Lines changed: 12 additions & 14 deletions b/‎src/alpaka/AlpakaCore/alpakaConfig.h‎
Lines changed: 12 additions & 14 deletions
diff --git a/‎src/alpaka/AlpakaCore/alpakaFwd.h‎
Lines changed: 96 additions & 0 deletions b/‎src/alpaka/AlpakaCore/alpakaFwd.h‎
Lines changed: 96 additions & 0 deletions
@@ -583,8 +583,8 @@ $(HWLOC_BASE):
 external_alpaka: $(ALPAKA_BASE)
 
 $(ALPAKA_BASE):
-	git clone git@github.com:alpaka-group/alpaka.git -b 0.9.0-rc1 $@
-	cd $@ && git checkout ebc1171feac21f1e21c49bcd9f053e7b01b584d0
+	git clone git@github.com:alpaka-group/alpaka.git -b develop $@
+	cd $@ && git checkout 879b95ffce2da499c9cc6e12d4cfd5545effa701
 
 # Kokkos
 external_kokkos: $(KOKKOS_LIB)
 
@@ -90,6 +90,11 @@ namespace cms::alpakatools {
     using Event = alpaka::Event<Queue>;  // the events used to synchronise the operations
     using Buffer = alpaka::Buf<Device, std::byte, alpaka::DimInt<1u>, size_t>;
 
+    // The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
+    static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
+                  "The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be the "
+                  "host CPU.");
+
     struct CachedBytes {
       size_t free = 0;       // total bytes freed and cached on this device
       size_t live = 0;       // total bytes currently in use oin this device
@@ -311,11 +316,24 @@ namespace cms::alpakatools {
       return false;
     }
 
+    Buffer allocateBuffer(size_t bytes, Queue const& queue) {
+      if constexpr (std::is_same_v<Device, alpaka::Dev<Queue>>) {
+        // allocate device memory
+        return alpaka::allocBuf<std::byte, size_t>(device_, bytes);
+      } else if constexpr (std::is_same_v<Device, alpaka::DevCpu>) {
+        // allocate pinned host memory
+        return alpaka::allocMappedBuf<std::byte, size_t>(device_, alpaka::getDev(queue), bytes);
+      } else {
+        // unsupported combination
+        static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
+                      "The \"memory device\" type can either be the same as the \"synchronisation device\" type, or be "
+                      "the host CPU.");
+      }
+    }
+
     void allocateNewBlock(BlockDescriptor& block) {
       try {
-        // FIXME simplify alpaka::Vec<alpaka::DimInt<1u>, size_t>{block.bytes} to block.bytes ?
-        block.buffer =
-            alpaka::allocBuf<std::byte, size_t>(device_, alpaka::Vec<alpaka::DimInt<1u>, size_t>{block.bytes});
+        block.buffer = allocateBuffer(block.bytes, *block.queue);
       } catch (std::runtime_error const& e) {
         // the allocation attempt failed: free all cached blocks on the device and retry
         if (debug_) {
@@ -329,25 +347,8 @@ namespace cms::alpakatools {
         freeAllCached();
 
         // throw an exception if it fails again
-        block.buffer =
-            alpaka::allocBuf<std::byte, size_t>(device_, alpaka::Vec<alpaka::DimInt<1u>, size_t>{block.bytes});
-      }
-
-      // for host memory, pin the newly allocated block
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
-      if (not cms::alpakatools::devices<alpaka::PltfCudaRt>.empty()) {
-        // it is possible to initialise the CUDA runtime and call cudaHostRegister
-        // only if the system has at least one supported GPU
-        alpaka::prepareForAsyncCopy(*block.buffer);
-      }
-#endif  // ALPAKA_ACC_GPU_CUDA_ENABLED
-#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
-      if (not cms::alpakatools::devices<alpaka::PltfHipRt>.empty()) {
-        // it is possible to initialise the ROCm runtime and call hipHostRegister
-        // only if the system has at least one supported GPU
-        alpaka::prepareForAsyncCopy(*block.buffer);
+        block.buffer = allocateBuffer(block.bytes, *block.queue);
       }
-#endif  // ALPAKA_ACC_GPU_HIP_ENABLED
 
       // create a new event associated to the "synchronisation device"
       block.event = Event{block.device()};
 
@@ -24,6 +24,8 @@ namespace alpaka {
     //! The CUDA async queue enqueue trait specialization for "safe tasks"
     template <>
     struct Enqueue<QueueCudaRtNonBlocking, HostOnlyTask> {
+      using TApi = ApiCudaRt;
+
       static void CUDART_CB callback(cudaStream_t /*queue*/, cudaError_t /*status*/, void* arg) {
         //ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(status);
         std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
@@ -42,7 +44,9 @@ namespace alpaka {
     //! The HIP async queue enqueue trait specialization for "safe tasks"
     template <>
     struct Enqueue<QueueHipRtNonBlocking, HostOnlyTask> {
-      static void HIPRT_CB callback(hipStream_t /*queue*/, hipError_t /*status*/, void* arg) {
+      using TApi = ApiHipRt;
+
+      static void callback(hipStream_t /*queue*/, hipError_t /*status*/, void* arg) {
         //ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(status);
         std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
         (*pTask)();
 
@@ -0,0 +1,32 @@
+#include <iostream>
+
+#include <alpaka/alpaka.hpp>
+
+#include "AlpakaCore/alpakaConfig.h"
+#include "AlpakaCore/alpakaDevices.h"
+#include "AlpakaCore/initialise.h"
+#include "Framework/demangle.h"
+
+namespace cms::alpakatools {
+
+  template <typename TPlatform>
+  void initialise() {
+    constexpr const char* suffix[] = {"devices.", "device:", "devices:"};
+
+    if (devices<TPlatform>.empty()) {
+      devices<TPlatform> = enumerate<TPlatform>();
+      auto size = devices<TPlatform>.size();
+      //std::cout << edm::demangle<TPlatform> << " platform succesfully initialised." << std::endl;
+      std::cout << "Found " << size << " " << suffix[size < 2 ? size : 2] << std::endl;
+      for (auto const& device : devices<TPlatform>) {
+        std::cout << "  - " << alpaka::getName(device) << std::endl;
+      }
+    } else {
+      //std::cout << edm::demangle<TPlatform> << " platform already initialised." << std::endl;
+    }
+  }
+
+  // explicit template instantiation definition
+  template void initialise<ALPAKA_ACCELERATOR_NAMESPACE::Platform>();
+
+}  // namespace cms::alpakatools
@@ -1,9 +1,7 @@
 #ifndef AlpakaCore_alpakaConfig_h
 #define AlpakaCore_alpakaConfig_h
 
-#include <type_traits>
-
-#include <alpaka/alpaka.hpp>
+#include "AlpakaCore/alpakaFwd.h"
 
 namespace alpaka_common {
 
@@ -32,7 +30,7 @@ namespace alpaka_common {
 
   // host types
   using DevHost = alpaka::DevCpu;
-  using PltfHost = alpaka::Pltf<DevHost>;
+  using PltfHost = alpaka::PltfCpu;
 
 }  // namespace alpaka_common
 
@@ -44,7 +42,7 @@ namespace alpaka_common {
 #define DEFINE_FWK_ALPAKA_EVENTSETUP_MODULE(name) \
   DEFINE_FWK_ALPAKA_EVENTSETUP_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::name)
 
-#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
+#ifdef ALPAKA_ACC_GPU_CUDA_PRESENT
 namespace alpaka_cuda_async {
   using namespace alpaka_common;
 
@@ -61,13 +59,13 @@ namespace alpaka_cuda_async {
 
 }  // namespace alpaka_cuda_async
 
-#endif  // ALPAKA_ACC_GPU_CUDA_ENABLED
+#endif  // ALPAKA_ACC_GPU_CUDA_PRESENT
 
 #ifdef ALPAKA_ACC_GPU_CUDA_ASYNC_BACKEND
 #define ALPAKA_ACCELERATOR_NAMESPACE alpaka_cuda_async
 #endif  // ALPAKA_ACC_GPU_CUDA_ASYNC_BACKEND
 
-#ifdef ALPAKA_ACC_GPU_HIP_ENABLED
+#ifdef ALPAKA_ACC_GPU_HIP_PRESENT
 namespace alpaka_rocm_async {
   using namespace alpaka_common;
 
@@ -84,13 +82,13 @@ namespace alpaka_rocm_async {
 
 }  // namespace alpaka_rocm_async
 
-#endif  // ALPAKA_ACC_GPU_HIP_ENABLED
+#endif  // ALPAKA_ACC_GPU_HIP_PRESENT
 
 #ifdef ALPAKA_ACC_GPU_HIP_ASYNC_BACKEND
 #define ALPAKA_ACCELERATOR_NAMESPACE alpaka_rocm_async
 #endif  // ALPAKA_ACC_GPU_HIP_ASYNC_BACKEND
 
-#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
+#ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT
 namespace alpaka_serial_sync {
   using namespace alpaka_common;
 
@@ -107,13 +105,13 @@ namespace alpaka_serial_sync {
 
 }  // namespace alpaka_serial_sync
 
-#endif  // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED
+#endif  // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_PRESENT
 
 #ifdef ALPAKA_ACC_CPU_B_SEQ_T_SEQ_SYNC_BACKEND
 #define ALPAKA_ACCELERATOR_NAMESPACE alpaka_serial_sync
 #endif  // ALPAKA_ACC_CPU_B_SEQ_T_SEQ_SYNC_BACKEND
 
-#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
+#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT
 namespace alpaka_tbb_async {
   using namespace alpaka_common;
 
@@ -130,13 +128,13 @@ namespace alpaka_tbb_async {
 
 }  // namespace alpaka_tbb_async
 
-#endif  // ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
+#endif  // ALPAKA_ACC_CPU_B_TBB_T_SEQ_PRESENT
 
 #ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ASYNC_BACKEND
 #define ALPAKA_ACCELERATOR_NAMESPACE alpaka_tbb_async
 #endif  // ALPAKA_ACC_CPU_B_TBB_T_SEQ_ASYNC_BACKEND
 
-#ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
+#ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_PRESENT
 namespace alpaka_omp2_async {
   using namespace alpaka_common;
 
@@ -153,7 +151,7 @@ namespace alpaka_omp2_async {
 
 }  // namespace alpaka_omp2_async
 
-#endif  // ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED
+#endif  // ALPAKA_ACC_CPU_B_OMP2_T_SEQ_PRESENT
 
 #ifdef ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ASYNC_BACKEND
 #define ALPAKA_ACCELERATOR_NAMESPACE alpaka_omp2_async
 
@@ -0,0 +1,96 @@
+#ifndef AlpakaCore_alpakaFwd_h
+#define AlpakaCore_alpakaFwd_h
+
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+/**
+ * This file forward declares specific types defined in Alpaka
+ * (depending on the backend-enabling macros) so that these types
+ * would be available throughout CMSSW without a direct dependence on
+ * Alpaka in order to avoid the constraints that would impose
+ * (primarily the device compiler)
+ *
+ * This is a little bit brittle, but let's see how it goes.
+ */
+namespace alpaka {
+
+  // miscellanea
+  template <std::size_t N>
+  using DimInt = std::integral_constant<std::size_t, N>;
+
+  template <typename TDim, typename TVal>
+  class Vec;
+
+  template <typename TDim, typename TIdx>
+  class WorkDivMembers;
+
+  // API
+  struct ApiCudaRt;
+  struct ApiHipRt;
+
+  // Platforms
+  class PltfCpu;
+  template <typename TApi>
+  class PltfUniformCudaHipRt;
+  using PltfCudaRt = PltfUniformCudaHipRt<ApiCudaRt>;
+  using PltfHipRt = PltfUniformCudaHipRt<ApiHipRt>;
+
+  // Devices
+  class DevCpu;
+  template <typename TApi>
+  class DevUniformCudaHipRt;
+  using DevCudaRt = DevUniformCudaHipRt<ApiCudaRt>;
+  using DevHipRt = DevUniformCudaHipRt<ApiHipRt>;
+
+  // Queues
+  template <typename TDev>
+  class QueueGenericThreadsBlocking;
+  using QueueCpuBlocking = QueueGenericThreadsBlocking<DevCpu>;
+
+  template <typename TDev>
+  class QueueGenericThreadsNonBlocking;
+  using QueueCpuNonBlocking = QueueGenericThreadsNonBlocking<DevCpu>;
+
+  namespace uniform_cuda_hip::detail {
+    template <typename TApi, bool TBlocking>
+    class QueueUniformCudaHipRt;
+  }
+  using QueueCudaRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, true>;
+  using QueueCudaRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, false>;
+  using QueueHipRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, true>;
+  using QueueHipRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, false>;
+
+  // Events
+  template <typename TDev>
+  class EventGenericThreads;
+  using EventCpu = EventGenericThreads<DevCpu>;
+
+  template <typename TApi>
+  class EventUniformCudaHipRt;
+  using EventCudaRt = EventUniformCudaHipRt<ApiCudaRt>;
+  using EventHipRt = EventUniformCudaHipRt<ApiHipRt>;
+
+  // Accelerators
+  template <typename TApi, typename TDim, typename TIdx>
+  class AccGpuUniformCudaHipRt;
+
+  template <typename TDim, typename TIdx>
+  using AccGpuCudaRt = AccGpuUniformCudaHipRt<ApiCudaRt, TDim, TIdx>;
+
+  template <typename TDim, typename TIdx>
+  using AccGpuHipRt = AccGpuUniformCudaHipRt<ApiHipRt, TDim, TIdx>;
+
+  template <typename TDim, typename TIdx>
+  class AccCpuSerial;
+
+  template <typename TDim, typename TIdx>
+  class AccCpuTbbBlocks;
+
+  template <typename TDim, typename TIdx>
+  class AccCpuOmp2Blocks;
+
+}  // namespace alpaka
+
+#endif  // AlpakaCore_alpakaFwd_h