Skip to content

Commit 7ad2852

Browse files
committed
WIP
1 parent 0565082 commit 7ad2852

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

include/umpire/op/hip.hpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,55 @@ inline void memset(T* ptr, int value, std::size_t count)
174174
}
175175
}
176176

177+
/*!
178+
* \brief device kernel to set elements to a value.
179+
*/
180+
template <typename T>
181+
__global__ void umpire_device_memset_kernel(T* data, int value, std::size_t size)
182+
{
183+
const std::size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
184+
const std::size_t stride = blockDim.x * gridDim.x;
185+
186+
for (std::size_t i = idx; i < size; i += stride) {
187+
data[i] = value;
188+
}
189+
}
190+
191+
/**
192+
* @brief Synchronous memory set implementation in device kernel
193+
*
194+
* @tparam T Type of memory
195+
* @param ptr Pointer to memory
196+
* @param value Value to set
197+
* @param count Number of elements
198+
*/
199+
template <typename T>
200+
inline void device_memset(T* ptr, int value, std::size_t count)
201+
{
202+
std::size_t size = detail::get_size<T>(count);
203+
204+
if (!ptr || size == 0) {
205+
return;
206+
}
207+
208+
constexpr int block_size = 256;
209+
std::size_t grid_size = (size + block_size - 1) / block_size;
210+
211+
const std::size_t max_blocks = 65535;
212+
if (grid_size > max_blocks) {
213+
grid_size = max_blocks;
214+
}
215+
216+
hipLaunchKernelGGL(umpire_device_memset_kernel, dim3(grid_size), dim3(block_size), 0, 0, ptr, value, size);
217+
218+
hipError_t err = hipGetLastError();
219+
if (err != hipSuccess) {
220+
UMPIRE_ERROR(runtime_error,
221+
fmt::format("device_memset kernel launch failed: {}", hipGetErrorString(err)));
222+
}
223+
224+
}
225+
177226
/**
178227
* @brief Asynchronous memory set implementation
179228
*
@@ -424,6 +473,24 @@ struct memset<resource::hip_platform> {
424473
}
425474
};
426475

476+
// HIP device memset operation
477+
template <>
478+
struct device_memset<resource::hip_platform> {
479+
/**
480+
* @brief HIP synchronous device memset
481+
*
482+
* @tparam T Type of memory being set
483+
* @param ptr Pointer to memory
484+
* @param val Value to set
485+
* @param len Number of elements to set
486+
*/
487+
template <typename T>
488+
static void exec(T* ptr, int val, std::size_t len) noexcept
489+
{
490+
detail::device_memset(ptr, val, len);
491+
}
492+
};
493+
427494
// Note: HIP platform uses the generic reallocate implementation from operations.hpp
428495
// since direct HIP reallocation isn't supported and memory pools require a safe allocate-copy-free pattern
429496

include/umpire/op/operations.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,12 @@ struct memset : public operation {
2828
static constexpr const char* name = "MEMSET";
2929
};
3030

31+
template <typename Src>
32+
struct device_memset : public operation {
33+
static constexpr int arity = 1;
34+
static constexpr const char* name = "DEVICE_MEMSET";
35+
};
36+
3137
template <typename Src>
3238
struct reallocate : public operation {
3339
static constexpr int arity = 1;

0 commit comments

Comments
 (0)