@@ -55,6 +55,44 @@ void device2host(size_t count)
5555 float *array = reinterpret_cast <float *>(buffer.data ());
5656 for (size_t i=0 ; i<count; ++i) EXPECT_EQ (array[i], float (i));
5757}// device2host
58+ void host2device (size_t count)
59+ {
60+ const size_t size = count * sizeof (float );
61+ auto hostBuffer = nanovdb::HostBuffer (size);
62+ for (size_t i=0 ; i<count; ++i) *hostBuffer.data <float >(i) = float (i);
63+
64+ int dev;
65+ cudaError_t err = cudaGetDevice (&dev);
66+ if (err != cudaSuccess) printf (" kernel cuda error: %d\n " , (int )err);
67+
68+ auto devBuffer = nanovdb::cuda::DeviceBuffer::create (hostBuffer, dev);// on device only
69+ EXPECT_EQ (size, devBuffer.size ());
70+ EXPECT_FALSE (devBuffer.data ());
71+ EXPECT_TRUE (devBuffer.deviceData ());
72+ float *d_array = reinterpret_cast <float *>(devBuffer.deviceData ());
73+ constexpr unsigned int num_threads = 256 ;
74+ unsigned int num_blocks = num_blocks = (static_cast <unsigned int >(count) + num_threads - 1 ) / num_threads;
75+
76+ bool *test, *d_test;
77+ cudaCheck (cudaMallocHost ((void **)&test, sizeof (bool )));
78+ cudaCheck (cudaMalloc ((void **)&d_test, sizeof (bool )));
79+ *test = true ;
80+ cudaCheck (cudaMemcpyAsync (d_test, test, sizeof (bool ), cudaMemcpyHostToDevice));// on host only
81+
82+ nanovdb::util::cuda::lambdaKernel<<<num_blocks, num_threads>>> (count, [=] __device__ (size_t i) {
83+ if (d_array[i] != float (i)) *d_test = false ;
84+ d_array[i] = float (i) + 1 .0f ;
85+ });
86+ cudaCheck (cudaMemcpy (test, d_test, sizeof (bool ), cudaMemcpyDeviceToHost));
87+ EXPECT_TRUE (*test);
88+ cudaCheck (cudaFreeHost (test));
89+ cudaCheck (cudaFree (d_test));
90+ devBuffer.deviceDownload ();// copy device -> host
91+ EXPECT_EQ (size, devBuffer.size ());
92+ EXPECT_TRUE (devBuffer.data ());
93+ EXPECT_TRUE (devBuffer.deviceData ());
94+ for (size_t i=0 ; i<count; ++i) EXPECT_EQ (*hostBuffer.data <float >(i) + 1 .0f , *devBuffer.data <float >(i));
95+ }// host2device
5896// used for testing cuda::DeviceBuffer
5997void host2device2host (size_t count)
6098{
@@ -156,6 +194,7 @@ TEST(TestNanoVDBCUDA, CudaDeviceBuffer)
156194 EXPECT_FALSE (buffer.empty ());
157195 }
158196 nanovdb::test::device2host (1000 );
197+ nanovdb::test::host2device (1000 );
159198 nanovdb::test::host2device2host (1000 );
160199}
161200
@@ -3594,4 +3633,40 @@ TEST(TestNanoVDBCUDA, VoxelBlockManager_ValueOnIndex)
35943633 cudaCheck (cudaFree (deviceJumpMap));
35953634}// VoxelBlockManager_ValueOnIndex
35963635
3636+ TEST (TestNanoVDBCUDA, GridHandle_from_HostBuffer)
3637+ {
3638+ using namespace nanovdb ;
3639+ using BufferT = nanovdb::cuda::DeviceBuffer;
3640+ auto hostHandle = tools::createLevelSetSphere<float >(100 , Vec3d (0 ),1 ,3 , Vec3d (0 ), " test" );
3641+
3642+ int dev;
3643+ cudaError_t err = cudaGetDevice (&dev);
3644+ EXPECT_EQ (err, cudaSuccess);
3645+ cudaStream_t stream;
3646+ cudaCheck (cudaStreamCreate (&stream));
3647+
3648+ {// longer version
3649+ auto devBuffer = BufferT::create (hostHandle.buffer (), dev, stream);
3650+ EXPECT_EQ (hostHandle.bufferSize (), devBuffer.size ());
3651+ auto devHandle = GridHandle<BufferT>(std::move (devBuffer));
3652+
3653+ // testing
3654+ EXPECT_EQ (hostHandle.bufferSize (), devHandle.bufferSize ());
3655+ EXPECT_EQ (devBuffer.size (), 0 );
3656+ devHandle.deviceDownload (stream);
3657+ for (uint64_t i=0 ; i<hostHandle.bufferSize (); ++i) {
3658+ EXPECT_EQ (*hostHandle.buffer ().data <char >(i), *devHandle.buffer ().data <char >(i));
3659+ }
3660+ }
3661+ {// compact version
3662+ auto devHandle = GridHandle<BufferT>(BufferT::create (hostHandle.buffer (), dev, stream));
3663+
3664+ // testing
3665+ EXPECT_EQ (hostHandle.bufferSize (), devHandle.bufferSize ());
3666+ devHandle.deviceDownload (stream);
3667+ for (uint64_t i=0 ; i<hostHandle.bufferSize (); ++i) {
3668+ EXPECT_EQ (*hostHandle.buffer ().data <char >(i), *devHandle.buffer ().data <char >(i));
3669+ }
3670+ }
3671+ }
35973672
0 commit comments