@@ -83,6 +83,12 @@ OSL_NAMESPACE_ENTER
8383 } \
8484 }
8585
86+
87+ #define DEVICE_ALLOC (size ) reinterpret_cast <CUdeviceptr>(device_alloc(size))
88+ #define COPY_TO_DEVICE (dst_device, src_host, size ) \
89+ copy_to_device (reinterpret_cast <void *>(dst_device), src_host, size)
90+
91+
8692static void
8793context_log_cb(unsigned int level, const char * tag, const char * message,
8894 void * /* cbdata */ )
@@ -116,10 +122,10 @@ OptixRaytracer::~OptixRaytracer()
116122{
117123 if (m_optix_ctx)
118124 OPTIX_CHECK (optixDeviceContextDestroy (m_optix_ctx));
119- for (void * ptr : m_ptrs_to_free)
125+ for (CUdeviceptr ptr : m_ptrs_to_free)
120126 cudaFree (reinterpret_cast <void *>(ptr));
121- for (void * ptr : m_arrays_to_free)
122- cudaFreeArray (reinterpret_cast <cudaArray_t>(ptr) );
127+ for (cudaArray_t arr : m_arrays_to_free)
128+ cudaFreeArray (arr );
123129}
124130
125131
@@ -133,6 +139,7 @@ OptixRaytracer::device_alloc(size_t size)
133139 errhandler ().errorfmt (" cudaMalloc({}) failed with error: {}\n " , size,
134140 cudaGetErrorString (res));
135141 }
142+ m_ptrs_to_free.push_back (reinterpret_cast <CUdeviceptr>(ptr));
136143 return ptr;
137144}
138145
@@ -237,18 +244,13 @@ OptixRaytracer::synch_attributes()
237244 const size_t podDataSize = cpuDataSize
238245 - sizeof (ustringhash) * numStrings;
239246
240- CUDA_CHECK (
241- cudaMalloc (reinterpret_cast <void **>(&d_color_system),
242- podDataSize + sizeof (ustringhash_pod) * numStrings));
243- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_color_system), colorSys,
244- podDataSize, cudaMemcpyHostToDevice));
245- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_color_system));
247+ d_color_system = DEVICE_ALLOC (podDataSize
248+ + sizeof (ustringhash_pod) * numStrings);
249+ COPY_TO_DEVICE (d_color_system, colorSys, podDataSize);
246250
247- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_osl_printf_buffer),
248- OSL_PRINTF_BUFFER_SIZE));
251+ d_osl_printf_buffer = DEVICE_ALLOC (OSL_PRINTF_BUFFER_SIZE);
249252 CUDA_CHECK (cudaMemset (reinterpret_cast <void *>(d_osl_printf_buffer), 0 ,
250253 OSL_PRINTF_BUFFER_SIZE));
251- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_osl_printf_buffer));
252254
253255 // then copy the device string to the end, first strings starting at dataPtr - (numStrings)
254256 // FIXME -- Should probably handle alignment better.
@@ -260,8 +262,7 @@ OptixRaytracer::synch_attributes()
260262 for (const ustringhash* end = cpuStringHash + numStrings;
261263 cpuStringHash < end; ++cpuStringHash) {
262264 ustringhash_pod devStr = cpuStringHash->hash ();
263- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(gpuStrings), &devStr,
264- sizeof (devStr), cudaMemcpyHostToDevice));
265+ COPY_TO_DEVICE (gpuStrings, &devStr, sizeof (devStr));
265266 gpuStrings += sizeof (ustringhash_pod);
266267 }
267268 }
@@ -556,13 +557,10 @@ OptixRaytracer::create_shaders()
556557 }
557558
558559 // Upload per-material interactive buffer table
559- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_interactive_params),
560- sizeof (void *) * material_interactive_params.size ()));
561- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_interactive_params),
562- material_interactive_params.data (),
563- sizeof (void *) * material_interactive_params.size (),
564- cudaMemcpyHostToDevice));
565- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_interactive_params));
560+ d_interactive_params = DEVICE_ALLOC (sizeof (void *)
561+ * material_interactive_params.size ());
562+ COPY_TO_DEVICE (d_interactive_params, material_interactive_params.data (),
563+ sizeof (void *) * material_interactive_params.size ());
566564}
567565
568566
@@ -650,12 +648,8 @@ OptixRaytracer::create_sbt()
650648 GenericRecord raygen_record;
651649 CUdeviceptr d_raygen_record;
652650 OPTIX_CHECK (optixSbtRecordPackHeader (m_raygen_group, &raygen_record));
653- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_raygen_record),
654- sizeof (GenericRecord)));
655- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_raygen_record),
656- &raygen_record, sizeof (GenericRecord),
657- cudaMemcpyHostToDevice));
658- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_raygen_record));
651+ d_raygen_record = DEVICE_ALLOC (sizeof (GenericRecord));
652+ COPY_TO_DEVICE (d_raygen_record, &raygen_record, sizeof (GenericRecord));
659653
660654 m_optix_sbt.raygenRecord = d_raygen_record;
661655 }
@@ -665,12 +659,8 @@ OptixRaytracer::create_sbt()
665659 GenericRecord miss_record;
666660 CUdeviceptr d_miss_record;
667661 OPTIX_CHECK (optixSbtRecordPackHeader (m_miss_group, &miss_record));
668- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_miss_record),
669- sizeof (GenericRecord)));
670- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_miss_record),
671- &miss_record, sizeof (GenericRecord),
672- cudaMemcpyHostToDevice));
673- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_miss_record));
662+ d_miss_record = DEVICE_ALLOC (sizeof (GenericRecord));
663+ COPY_TO_DEVICE (d_miss_record, &miss_record, sizeof (GenericRecord));
674664
675665 m_optix_sbt.missRecordBase = d_miss_record;
676666 m_optix_sbt.missRecordStrideInBytes = sizeof (GenericRecord);
@@ -685,13 +675,9 @@ OptixRaytracer::create_sbt()
685675 OPTIX_CHECK (
686676 optixSbtRecordPackHeader (m_closesthit_group, &hitgroup_records[0 ]));
687677
688- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_hitgroup_records),
689- nhitgroups * sizeof (GenericRecord)));
690- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_hitgroup_records),
691- &hitgroup_records[0 ],
692- nhitgroups * sizeof (GenericRecord),
693- cudaMemcpyHostToDevice));
694- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_hitgroup_records));
678+ d_hitgroup_records = DEVICE_ALLOC (nhitgroups * sizeof (GenericRecord));
679+ COPY_TO_DEVICE (d_hitgroup_records, &hitgroup_records[0 ],
680+ nhitgroups * sizeof (GenericRecord));
695681
696682 m_optix_sbt.hitgroupRecordBase = d_hitgroup_records;
697683 m_optix_sbt.hitgroupRecordStrideInBytes = sizeof (GenericRecord);
@@ -709,13 +695,9 @@ OptixRaytracer::create_sbt()
709695 &callable_records[idx]));
710696 }
711697
712- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_callable_records),
713- (nshaders) * sizeof (GenericRecord)));
714- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_callable_records),
715- callable_records.data (),
716- (nshaders) * sizeof (GenericRecord),
717- cudaMemcpyHostToDevice));
718- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_callable_records));
698+ d_callable_records = DEVICE_ALLOC ((nshaders) * sizeof (GenericRecord));
699+ COPY_TO_DEVICE (d_callable_records, callable_records.data (),
700+ (nshaders) * sizeof (GenericRecord));
719701
720702 m_optix_sbt.callablesRecordBase = d_callable_records;
721703 m_optix_sbt.callablesRecordStrideInBytes = sizeof (GenericRecord);
@@ -733,14 +715,9 @@ OptixRaytracer::create_sbt()
733715 CUdeviceptr d_setglobals_raygen_record;
734716 OPTIX_CHECK (
735717 optixSbtRecordPackHeader (m_setglobals_raygen_group, &record));
736- CUDA_CHECK (
737- cudaMalloc (reinterpret_cast <void **>(&d_setglobals_raygen_record),
738- sizeof (GenericRecord)));
739- CUDA_CHECK (
740- cudaMemcpy (reinterpret_cast <void *>(d_setglobals_raygen_record),
741- &record, sizeof (GenericRecord), cudaMemcpyHostToDevice));
742- m_ptrs_to_free.push_back (
743- reinterpret_cast <void *>(d_setglobals_raygen_record));
718+ d_setglobals_raygen_record = DEVICE_ALLOC (sizeof (GenericRecord));
719+ COPY_TO_DEVICE (d_setglobals_raygen_record, &record,
720+ sizeof (GenericRecord));
744721
745722 m_setglobals_optix_sbt.raygenRecord = d_setglobals_raygen_record;
746723 }
@@ -750,14 +727,9 @@ OptixRaytracer::create_sbt()
750727 GenericRecord record;
751728 CUdeviceptr d_setglobals_miss_record;
752729 OPTIX_CHECK (optixSbtRecordPackHeader (m_setglobals_miss_group, &record));
753- CUDA_CHECK (
754- cudaMalloc (reinterpret_cast <void **>(&d_setglobals_miss_record),
755- sizeof (GenericRecord)));
756- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_setglobals_miss_record),
757- &record, sizeof (GenericRecord),
758- cudaMemcpyHostToDevice));
759- m_ptrs_to_free.push_back (
760- reinterpret_cast <void *>(d_setglobals_miss_record));
730+ d_setglobals_miss_record = DEVICE_ALLOC (sizeof (GenericRecord));
731+ COPY_TO_DEVICE (d_setglobals_miss_record, &record,
732+ sizeof (GenericRecord));
761733
762734 m_setglobals_optix_sbt.missRecordBase = d_setglobals_miss_record;
763735 m_setglobals_optix_sbt.missRecordStrideInBytes = sizeof (GenericRecord);
@@ -797,20 +769,12 @@ OptixRaytracer::build_accel()
797769 accel_options.operation = OPTIX_BUILD_OPERATION_BUILD;
798770
799771 const size_t vertices_size = sizeof (Vec3) * scene.verts .size ();
800- CUDA_CHECK (
801- cudaMalloc (reinterpret_cast <void **>(&d_vertices), vertices_size));
802- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_vertices),
803- scene.verts .data (), vertices_size,
804- cudaMemcpyHostToDevice));
805- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_vertices));
772+ d_vertices = DEVICE_ALLOC (vertices_size);
773+ COPY_TO_DEVICE (d_vertices, scene.verts .data (), vertices_size);
806774
807775 const size_t indices_size = scene.triangles .size () * sizeof (int32_t ) * 3 ;
808- CUDA_CHECK (
809- cudaMalloc (reinterpret_cast <void **>(&d_vert_indices), indices_size));
810- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_vert_indices),
811- scene.triangles .data (), indices_size,
812- cudaMemcpyHostToDevice));
813- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_vert_indices));
776+ d_vert_indices = DEVICE_ALLOC (indices_size);
777+ COPY_TO_DEVICE (d_vert_indices, scene.triangles .data (), indices_size);
814778
815779 const uint32_t triangle_input_flags[1 ] = { OPTIX_GEOMETRY_FLAG_NONE };
816780 OptixBuildInput triangle_input = {};
@@ -836,9 +800,7 @@ OptixRaytracer::build_accel()
836800 CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_temp_buffer),
837801 gas_buffer_sizes.tempSizeInBytes ));
838802
839- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_accel_output_buffer),
840- gas_buffer_sizes.outputSizeInBytes ));
841- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_accel_output_buffer));
803+ d_accel_output_buffer = DEVICE_ALLOC (gas_buffer_sizes.outputSizeInBytes );
842804
843805 OPTIX_CHECK (optixAccelBuild (
844806 m_optix_ctx, 0 , &accel_options, &triangle_input, 1 , d_temp_buffer,
@@ -855,16 +817,9 @@ OptixRaytracer::prepare_background()
855817{
856818 if (getBackgroundShaderID () >= 0 ) {
857819 const int bg_res = std::max<int >(32 , getBackgroundResolution ());
858- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_bg_values),
859- 3 * sizeof (float ) * bg_res * bg_res));
860- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_bg_rows),
861- sizeof (float ) * bg_res));
862- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_bg_cols),
863- sizeof (float ) * bg_res * bg_res));
864-
865- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_bg_values));
866- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_bg_rows));
867- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_bg_cols));
820+ d_bg_values = DEVICE_ALLOC (3 * sizeof (float ) * bg_res * bg_res);
821+ d_bg_rows = DEVICE_ALLOC (sizeof (float ) * bg_res);
822+ d_bg_cols = DEVICE_ALLOC (sizeof (float ) * bg_res * bg_res);
868823 }
869824}
870825
@@ -875,69 +830,46 @@ OptixRaytracer::upload_mesh_data()
875830{
876831 // Upload the extra geometry data to the device
877832 const size_t uvs_size = sizeof (Vec2) * scene.uvs .size ();
878- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_uvs), uvs_size));
879- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_uvs), scene.uvs .data (),
880- uvs_size, cudaMemcpyHostToDevice));
881- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_uvs));
833+ d_uvs = DEVICE_ALLOC (uvs_size);
834+ COPY_TO_DEVICE (d_uvs, scene.uvs .data (), uvs_size);
882835
883836 const size_t uv_indices_size = scene.uv_triangles .size () * sizeof (int32_t )
884837 * 3 ;
885- CUDA_CHECK (
886- cudaMalloc (reinterpret_cast <void **>(&d_uv_indices), uv_indices_size));
887- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_uv_indices),
888- scene.uv_triangles .data (), uv_indices_size,
889- cudaMemcpyHostToDevice));
890- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_uv_indices));
838+ d_uv_indices = DEVICE_ALLOC (uv_indices_size);
839+ COPY_TO_DEVICE (d_uv_indices, scene.uv_triangles .data (), uv_indices_size);
891840
892841 const size_t normals_size = sizeof (Vec3) * scene.normals .size ();
893842 if (normals_size > 0 ) {
894- CUDA_CHECK (
895- cudaMalloc (reinterpret_cast <void **>(&d_normals), normals_size));
896- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_normals),
897- scene.normals .data (), normals_size,
898- cudaMemcpyHostToDevice));
899- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_normals));
843+ d_normals = DEVICE_ALLOC (normals_size);
844+ COPY_TO_DEVICE (d_normals, scene.normals .data (), normals_size);
900845 }
901846
902847 const size_t normal_indices_size = scene.n_triangles .size ()
903848 * sizeof (int32_t ) * 3 ;
904- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_normal_indices),
905- normal_indices_size));
906- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_normal_indices),
907- scene.n_triangles .data (), normal_indices_size,
908- cudaMemcpyHostToDevice));
909- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_normal_indices));
849+ d_normal_indices = DEVICE_ALLOC (normal_indices_size);
850+ COPY_TO_DEVICE (d_normal_indices, scene.n_triangles .data (),
851+ normal_indices_size);
910852
911853 const size_t shader_ids_size = scene.shaderids .size () * sizeof (int );
912- CUDA_CHECK (
913- cudaMalloc (reinterpret_cast <void **>(&d_shader_ids), shader_ids_size));
914- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_shader_ids),
915- scene.shaderids .data (), shader_ids_size,
916- cudaMemcpyHostToDevice));
917- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_shader_ids));
854+ d_shader_ids = DEVICE_ALLOC (shader_ids_size);
855+ COPY_TO_DEVICE (d_shader_ids, scene.shaderids .data (), shader_ids_size);
918856
919857 // TODO: These could be packed, but for now just use ints instead of bools
920858 std::vector<int32_t > shader_is_light;
921859 for (const bool & is_light : OptixRaytracer::shader_is_light ())
922860 shader_is_light.push_back (is_light);
923861 const size_t shader_is_light_size = shader_is_light.size ()
924862 * sizeof (int32_t );
925- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_shader_is_light),
926- shader_is_light_size));
927- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_shader_is_light),
928- shader_is_light.data (), shader_is_light_size,
929- cudaMemcpyHostToDevice));
930- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_shader_is_light));
863+ d_shader_is_light = DEVICE_ALLOC (shader_is_light_size);
864+ COPY_TO_DEVICE (d_shader_is_light, shader_is_light.data (),
865+ shader_is_light_size);
931866
932867
933868 const size_t lightprims_size = OptixRaytracer::lightprims ().size ()
934869 * sizeof (uint32_t );
935- CUDA_CHECK (
936- cudaMalloc (reinterpret_cast <void **>(&d_lightprims), lightprims_size));
937- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_lightprims),
938- OptixRaytracer::lightprims ().data (), lightprims_size,
939- cudaMemcpyHostToDevice));
940- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_lightprims));
870+ d_lightprims = DEVICE_ALLOC (lightprims_size);
871+ COPY_TO_DEVICE (d_lightprims, OptixRaytracer::lightprims ().data (),
872+ lightprims_size);
941873
942874 // Copy the mesh ID for each triangle to the device
943875 std::vector<int > mesh_ids;
@@ -948,11 +880,8 @@ OptixRaytracer::upload_mesh_data()
948880 mesh_ids.push_back (meshid);
949881 }
950882 const size_t mesh_ids_size = mesh_ids.size () * sizeof (int32_t );
951- CUDA_CHECK (
952- cudaMalloc (reinterpret_cast <void **>(&d_mesh_ids), mesh_ids_size));
953- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_mesh_ids), mesh_ids.data (),
954- mesh_ids_size, cudaMemcpyHostToDevice));
955- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_mesh_ids));
883+ d_mesh_ids = DEVICE_ALLOC (mesh_ids_size);
884+ COPY_TO_DEVICE (d_mesh_ids, mesh_ids.data (), mesh_ids_size);
956885
957886 // Copy the mesh surface areas to the device
958887 std::vector<float > mesh_surfacearea;
@@ -971,12 +900,9 @@ OptixRaytracer::upload_mesh_data()
971900
972901 const size_t mesh_surfacearea_size = mesh_surfacearea.size ()
973902 * sizeof (float );
974- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_surfacearea),
975- mesh_surfacearea_size));
976- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_surfacearea),
977- mesh_surfacearea.data (), mesh_surfacearea_size,
978- cudaMemcpyHostToDevice));
979- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_surfacearea));
903+ d_surfacearea = DEVICE_ALLOC (mesh_surfacearea_size);
904+ COPY_TO_DEVICE (d_surfacearea, mesh_surfacearea.data (),
905+ mesh_surfacearea_size);
980906}
981907
982908
@@ -1061,7 +987,7 @@ OptixRaytracer::get_texture_handle(ustring filename,
1061987 CUDA_CHECK (cudaMemcpy2DToArray (pixelArray, 0 , 0 , level_pixels[0 ].data (),
1062988 pitch, pitch, img_height,
1063989 cudaMemcpyHostToDevice));
1064- m_arrays_to_free.push_back (reinterpret_cast < void *>( pixelArray) );
990+ m_arrays_to_free.push_back (pixelArray);
1065991
1066992 cudaResourceDesc res_desc = {};
1067993 res_desc.resType = cudaResourceTypeMipmappedArray;
@@ -1123,12 +1049,8 @@ OptixRaytracer::warmup()
11231049void
11241050OptixRaytracer::render (int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED)
11251051{
1126- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_output_buffer),
1127- xres * yres * 4 * sizeof (float )));
1128- CUDA_CHECK (cudaMalloc (reinterpret_cast <void **>(&d_launch_params),
1129- sizeof (RenderParams)));
1130- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_output_buffer));
1131- m_ptrs_to_free.push_back (reinterpret_cast <void *>(d_launch_params));
1052+ d_output_buffer = DEVICE_ALLOC (xres * yres * 4 * sizeof (float ));
1053+ d_launch_params = DEVICE_ALLOC (sizeof (RenderParams));
11321054
11331055 m_xres = xres;
11341056 m_yres = yres;
@@ -1186,8 +1108,7 @@ OptixRaytracer::render(int xres OSL_MAYBE_UNUSED, int yres OSL_MAYBE_UNUSED)
11861108 params.bg_rows = d_bg_rows;
11871109 params.bg_cols = d_bg_cols;
11881110
1189- CUDA_CHECK (cudaMemcpy (reinterpret_cast <void *>(d_launch_params), ¶ms,
1190- sizeof (RenderParams), cudaMemcpyHostToDevice));
1111+ COPY_TO_DEVICE (d_launch_params, ¶ms, sizeof (RenderParams));
11911112
11921113 // Set up global variables
11931114 OPTIX_CHECK (optixLaunch (m_optix_pipeline, m_cuda_stream, d_launch_params,
0 commit comments