@@ -174,6 +174,55 @@ inline void memset(T* ptr, int value, std::size_t count)
174174 }
175175}
176176
177+ /* !
178+ * \brief device kernel to set elements to a value.
179+ */
180+ template <typename T>
181+ __global__ void umpire_device_memset_kernel (T* data, int value, std::size_t size)
182+ {
183+ const std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x ;
184+ const std::size_t stride = blockDim.x * gridDim.x ;
185+
186+ for (std::size_t i = idx; i < size; i += stride) {
187+ data[i] = value;
188+ }
189+ }
190+
191+ /* *
192+ * @brief Synchronous memory set implementation in device kernel
193+ *
194+ * @tparam T Type of memory
195+ * @param ptr Pointer to memory
196+ * @param value Value to set
197+ * @param count Number of elements
198+ */
199+ template <typename T>
200+ inline void device_memset (T* ptr, int value, std::size_t count)
201+ {
202+ std::size_t size = detail::get_size<T>(count);
203+
204+ if (!ptr || size == 0 ) {
205+ return ;
206+ }
207+
208+ constexpr int block_size = 256 ;
209+ std::size_t grid_size = (size + block_size - 1 ) / block_size;
210+
211+ const std::size_t max_blocks = 65535 ;
212+ if (grid_size > max_blocks) {
213+ grid_size = max_blocks;
214+ }
215+
216+ hipLaunchKernelGGL (umpire_device_memset_kernel, dim3 (grid_size), dim3 (block_size), 0 , 0 , ptr, value, size);
217+
218+ hipError_t err = hipGetLastError ();
219+ if (err != hipSuccess) {
220+ UMPIRE_ERROR (runtime_error,
221+ fmt::format (" device_memset kernel launch failed: {}" , hipGetErrorString (err)));
222+ }
223+
224+ }
225+
177226/* *
178227 * @brief Asynchronous memory set implementation
179228 *
@@ -424,6 +473,24 @@ struct memset<resource::hip_platform> {
424473 }
425474};
426475
476+ // HIP device memset operation
477+ template <>
478+ struct device_memset <resource::hip_platform> {
479+ /* *
480+ * @brief HIP synchronous device memset
481+ *
482+ * @tparam T Type of memory being set
483+ * @param ptr Pointer to memory
484+ * @param val Value to set
485+ * @param len Number of elements to set
486+ */
487+ template <typename T>
488+ static void exec (T* ptr, int val, std::size_t len) noexcept
489+ {
490+ detail::device_memset (ptr, val, len);
491+ }
492+ };
493+
427494// Note: HIP platform uses the generic reallocate implementation from operations.hpp
428495// since direct HIP reallocation isn't supported and memory pools require a safe allocate-copy-free pattern
429496
0 commit comments