Skip to content

Commit c4a3cb3

Browse files
kmusethmatthewdcong
authored andcommitted
Device buffer from host buffer
* minor cleanup Signed-off-by: Ken <[email protected]> * construct DeviceBuffer from HostBuffer Signed-off-by: Ken <[email protected]> * added HostBuffer::data<T>(int i) Signed-off-by: Ken <[email protected]> * improved unit-test Signed-off-by: Ken <[email protected]> * snapshot Signed-off-by: Ken <[email protected]> * removed whitespace Signed-off-by: Ken <[email protected]> --------- Signed-off-by: Ken <[email protected]> Signed-off-by: Matthew Cong <[email protected]>
1 parent 3917418 commit c4a3cb3

File tree

3 files changed

+104
-1
lines changed

3 files changed

+104
-1
lines changed

nanovdb/nanovdb/HostBuffer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,13 @@ class HostBuffer
192192
void* data() { return mData; }
193193
//@}
194194

195+
/// @brief Returns an offset pointer of a specific type from the allocated host memory
196+
/// @tparam T Type of the pointer returned
197+
/// @param count Numbers of elements of @c parameter type T to skip
198+
/// @warning might return NULL
199+
template <typename T>
200+
T* data(ptrdiff_t count = 0) const {return mData ? reinterpret_cast<T*>(mData) + count : nullptr;}
201+
195202
//@{
196203
/// @brief Returns the size in bytes associated with this buffer.
197204
uint64_t bufferSize() const { return mSize; }

nanovdb/nanovdb/cuda/DeviceBuffer.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,20 @@ class DeviceBuffer
127127
other.mSize = other.mDeviceCount = other.mManaged = 0;
128128
}
129129

130+
/// @brief Copy-constructor from a HostBuffer
131+
/// @param buffer host buffer from which to copy data
132+
/// @param device id of the device on which to initialize the buffer
133+
/// @param stream cuda stream
134+
DeviceBuffer(const HostBuffer& buffer, int device = cudaCpuDeviceId, cudaStream_t stream = 0)
135+
: DeviceBuffer(buffer.size(), device, stream)
136+
{
137+
if (mCpuData) {
138+
cudaCheck(cudaMemcpy(mCpuData, buffer.data(), mSize, cudaMemcpyHostToHost));
139+
} else if (mGpuData[device]) {
140+
cudaCheck(cudaMemcpyAsync(mGpuData[device], buffer.data(), mSize, cudaMemcpyHostToDevice, stream));
141+
}
142+
}
143+
130144
/// @brief Destructor frees memory on both the host and device
131145
~DeviceBuffer() { this->clear(); };
132146

@@ -153,13 +167,20 @@ class DeviceBuffer
153167
/// @param list list of device IDs and device memory pointers
154168
static DeviceBuffer create(uint64_t size, void* cpuData, std::initializer_list<std::pair<int,void*>> list) {return DeviceBuffer(size, cpuData, list);}
155169

170+
/// @brief Static factory method that returns an instance of this buffer constructed from a HostBuffer
171+
/// @param buffer host buffer from which to copy data
172+
/// @param device id of the device on which to initialize the buffer
173+
/// @param stream cuda stream
174+
static DeviceBuffer create(const HostBuffer& buffer, int device = cudaCpuDeviceId, cudaStream_t stream = 0) {return DeviceBuffer(buffer, device, stream);}
175+
156176
///////////////////////////////////////////////////////////////////////
157177

158178
/// @{
159179
/// @brief Factory methods that create a shared pointer to an DeviceBuffer instance
160180
static PtrT createPtr(uint64_t size, const DeviceBuffer* = nullptr, int device = cudaCpuDeviceId, cudaStream_t stream = 0) {return std::make_shared<DeviceBuffer>(size, device, stream);}
161181
static PtrT createPtr(uint64_t size, void* cpuData, void* gpuData) {return std::make_shared<DeviceBuffer>(size, cpuData, gpuData);}
162182
static PtrT createPtr(uint64_t size, void* cpuData, std::initializer_list<std::pair<int,void*>> list) {return std::make_shared<DeviceBuffer>(size, cpuData, list);}
183+
static PtrT createPtr(const HostBuffer& buffer, int device = cudaCpuDeviceId, cudaStream_t stream = 0) {return std::make_shared<DeviceBuffer>(buffer, device, stream);}
163184
/// @}
164185

165186
///////////////////////////////////////////////////////////////////////
@@ -179,7 +200,7 @@ class DeviceBuffer
179200
/// @brief Returns an offset pointer of a specific type from the allocated host memory
180201
/// @tparam T Type of the pointer returned
181202
/// @param count Numbers of elements of @c parameter type T to skip
182-
/// @warning assumes that this instance is not empty!
203+
/// @warning might return NULL
183204
template <typename T>
184205
T* data(ptrdiff_t count = 0, int device = cudaCpuDeviceId) const
185206
{

nanovdb/nanovdb/unittest/TestNanoVDB.cu

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,44 @@ void device2host(size_t count)
5555
float *array = reinterpret_cast<float*>(buffer.data());
5656
for (size_t i=0; i<count; ++i) EXPECT_EQ(array[i], float(i));
5757
}// device2host
58+
void host2device(size_t count)
59+
{
60+
const size_t size = count * sizeof(float);
61+
auto hostBuffer = nanovdb::HostBuffer(size);
62+
for (size_t i=0; i<count; ++i) *hostBuffer.data<float>(i) = float(i);
63+
64+
int dev;
65+
cudaError_t err = cudaGetDevice(&dev);
66+
if (err != cudaSuccess) printf("kernel cuda error: %d\n", (int)err);
67+
68+
auto devBuffer = nanovdb::cuda::DeviceBuffer::create(hostBuffer, dev);// on device only
69+
EXPECT_EQ(size, devBuffer.size());
70+
EXPECT_FALSE(devBuffer.data());
71+
EXPECT_TRUE(devBuffer.deviceData());
72+
float *d_array = reinterpret_cast<float*>(devBuffer.deviceData());
73+
constexpr unsigned int num_threads = 256;
74+
unsigned int num_blocks = num_blocks = (static_cast<unsigned int>(count) + num_threads - 1) / num_threads;
75+
76+
bool *test, *d_test;
77+
cudaCheck(cudaMallocHost((void**)&test, sizeof(bool)));
78+
cudaCheck(cudaMalloc((void**)&d_test, sizeof(bool)));
79+
*test = true;
80+
cudaCheck(cudaMemcpyAsync(d_test, test, sizeof(bool), cudaMemcpyHostToDevice));// on host only
81+
82+
nanovdb::util::cuda::lambdaKernel<<<num_blocks, num_threads>>>(count, [=] __device__ (size_t i) {
83+
if (d_array[i] != float(i)) *d_test = false;
84+
d_array[i] = float(i) + 1.0f;
85+
});
86+
cudaCheck(cudaMemcpy(test, d_test, sizeof(bool), cudaMemcpyDeviceToHost));
87+
EXPECT_TRUE(*test);
88+
cudaCheck(cudaFreeHost(test));
89+
cudaCheck(cudaFree(d_test));
90+
devBuffer.deviceDownload();// copy device -> host
91+
EXPECT_EQ(size, devBuffer.size());
92+
EXPECT_TRUE(devBuffer.data());
93+
EXPECT_TRUE(devBuffer.deviceData());
94+
for (size_t i=0; i<count; ++i) EXPECT_EQ(*hostBuffer.data<float>(i) + 1.0f, *devBuffer.data<float>(i));
95+
}// host2device
5896
// used for testing cuda::DeviceBuffer
5997
void host2device2host(size_t count)
6098
{
@@ -156,6 +194,7 @@ TEST(TestNanoVDBCUDA, CudaDeviceBuffer)
156194
EXPECT_FALSE(buffer.empty());
157195
}
158196
nanovdb::test::device2host(1000);
197+
nanovdb::test::host2device(1000);
159198
nanovdb::test::host2device2host(1000);
160199
}
161200

@@ -3594,4 +3633,40 @@ TEST(TestNanoVDBCUDA, VoxelBlockManager_ValueOnIndex)
35943633
cudaCheck(cudaFree(deviceJumpMap));
35953634
}// VoxelBlockManager_ValueOnIndex
35963635

3636+
TEST(TestNanoVDBCUDA, GridHandle_from_HostBuffer)
3637+
{
3638+
using namespace nanovdb;
3639+
using BufferT = nanovdb::cuda::DeviceBuffer;
3640+
auto hostHandle = tools::createLevelSetSphere<float>(100, Vec3d(0),1,3, Vec3d(0), "test");
3641+
3642+
int dev;
3643+
cudaError_t err = cudaGetDevice(&dev);
3644+
EXPECT_EQ(err, cudaSuccess);
3645+
cudaStream_t stream;
3646+
cudaCheck(cudaStreamCreate(&stream));
3647+
3648+
{// longer version
3649+
auto devBuffer = BufferT::create(hostHandle.buffer(), dev, stream);
3650+
EXPECT_EQ(hostHandle.bufferSize(), devBuffer.size());
3651+
auto devHandle = GridHandle<BufferT>(std::move(devBuffer));
3652+
3653+
// testing
3654+
EXPECT_EQ(hostHandle.bufferSize(), devHandle.bufferSize());
3655+
EXPECT_EQ(devBuffer.size(), 0);
3656+
devHandle.deviceDownload(stream);
3657+
for (uint64_t i=0; i<hostHandle.bufferSize(); ++i) {
3658+
EXPECT_EQ(*hostHandle.buffer().data<char>(i), *devHandle.buffer().data<char>(i));
3659+
}
3660+
}
3661+
{// compact version
3662+
auto devHandle = GridHandle<BufferT>(BufferT::create(hostHandle.buffer(), dev, stream));
3663+
3664+
// testing
3665+
EXPECT_EQ(hostHandle.bufferSize(), devHandle.bufferSize());
3666+
devHandle.deviceDownload(stream);
3667+
for (uint64_t i=0; i<hostHandle.bufferSize(); ++i) {
3668+
EXPECT_EQ(*hostHandle.buffer().data<char>(i), *devHandle.buffer().data<char>(i));
3669+
}
3670+
}
3671+
}
35973672

0 commit comments

Comments
 (0)