diff --git a/doc/classes/RDShaderSPIRV.xml b/doc/classes/RDShaderSPIRV.xml index f3de94072f95..b71c227ba523 100644 --- a/doc/classes/RDShaderSPIRV.xml +++ b/doc/classes/RDShaderSPIRV.xml @@ -42,12 +42,27 @@ + + The SPIR-V bytecode for the any hit shader stage. + + + The SPIR-V bytecode for the closest hit shader stage. + The SPIR-V bytecode for the compute shader stage. The SPIR-V bytecode for the fragment shader stage. + + The SPIR-V bytecode for the intersection shader stage. + + + The SPIR-V bytecode for the miss shader stage. + + + The SPIR-V bytecode for the ray generation shader stage. + The SPIR-V bytecode for the tessellation control shader stage. @@ -57,12 +72,27 @@ The SPIR-V bytecode for the vertex shader stage. + + The compilation error message for the any hit shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + + + The compilation error message for the closest hit shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + The compilation error message for the compute shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. The compilation error message for the fragment shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + + The compilation error message for the intersection shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + + + The compilation error message for the miss shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + + + The compilation error message for the ray generation shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. + The compilation error message for the tessellation control shader stage (set by the SPIR-V compiler and Godot). If empty, shader compilation was successful. diff --git a/doc/classes/RDShaderSource.xml b/doc/classes/RDShaderSource.xml index a7b897d56e69..1b8d5fd7af78 100644 --- a/doc/classes/RDShaderSource.xml +++ b/doc/classes/RDShaderSource.xml @@ -31,12 +31,27 @@ The language the shader is written in. + + Source code for the shader's any hit stage. + + + Source code for the shader's closest hit stage. + Source code for the shader's compute stage. Source code for the shader's fragment stage. + + Source code for the shader's intersection stage. + + + Source code for the shader's miss stage. + + + Source code for the shader's ray generation stage. + Source code for the shader's tessellation control stage. diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 66dbd087bf44..356746d8c5e0 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -14,6 +14,13 @@ $DOCS_URL/tutorials/shaders/compute_shaders.html + + + + + Builds the [param acceleration_structure]. + + @@ -22,6 +29,18 @@ This method does nothing. + + + + + + + + Creates a new Bottom Level Acceleration Structure. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + [param position_attribute_location] selects which vertex attribute location supplies the position data (default is 0). + + @@ -732,6 +751,102 @@ Limits for various graphics hardware can be found in the [url=https://vulkan.gpuinfo.org/]Vulkan Hardware Database[/url]. + + + + Starts a list of raytracing commands. The returned value should be passed to other [code]raytracing_list_*[/code] functions. + Multiple raytracing lists cannot be created at the same time; you must finish the previous raytracing list first using [method raytracing_list_end]. + A simple raytracing operation might look like this (code is not a complete example): + [codeblocks] + [gdscript] + var rd = RenderingDevice.new() + assert(rd.has_feature(RenderingDevice.SUPPORTS_RAYTRACING_PIPELINE)) + + # Create a BLAS for a mesh. + blas = rd.blas_create(vertex_array, index_array, RenderingDevice.ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE) + # Create TLAS with BLASs. + instances_buffer = rd.tlas_instances_buffer_create(1) + rd.tlas_instances_buffer_fill(instances_buffer, [blas], [Transform3D()]) + tlas = rd.tlas_create(instances_buffer) + + # Build acceleration structures. + rd.acceleration_structure_build(blas) + rd.acceleration_structure_build(tlas) + + var raylist = rd.raytracing_list_begin() + + # Bind pipeline and uniforms. + rd.raytracing_list_bind_raytracing_pipeline(raylist, raytracing_pipeline) + rd.raytracing_list_bind_uniform_set(raylist, uniform_set, 0) + + # Trace rays. + var width = get_viewport().size.x + var height = get_viewport().size.y + rd.raytracing_list_trace_rays(raylist, width, height) + + rd.raytracing_list_end() + [/gdscript] + [/codeblocks] + + + + + + + + Binds [param raytracing_pipeline] to the specified [param raytracing_list]. + + + + + + + + + Binds the [param uniform_set] to this [param raytracing_list]. + + + + + + Finishes a list of raytracing commands created with the [code]raytracing_*[/code] methods. + + + + + + + + + Sets the push constant data to [param buffer] for the specified [param raytracing_list]. The shader determines how this binary data is used. The buffer's size in bytes must also be specified in [param size_bytes] (this can be obtained by calling the [method PackedByteArray.size] method on the passed [param buffer]). + + + + + + + + + Initializes a ray tracing dispatch for the specified [param raytracing_list] assembling a group of [param width] x [param height] rays. + + + + + + + + Creates a new raytracing pipeline. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + [b]Note:[/b]: Recursive raytracing is not permitted. + + + + + + + Returns [code]true[/code] if the raytracing pipeline specified by the [param raytracing_pipeline] RID is valid, [code]false[/code] otherwise. + + @@ -1089,6 +1204,33 @@ [b]Note:[/b] The existing [param texture] requires the [constant TEXTURE_USAGE_CAN_UPDATE_BIT] to be updatable. + + + + + Creates a new Top Level Acceleration Structure. It can be accessed with the RID that is returned. + The instances buffer passed as input is expected to be filled before building the TLAS. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + + + + + + + + Creates a new instances buffer which can be used to create a TLAS. It can be accessed with the RID that is returned. + Once finished with your RID, you will want to free the RID using the RenderingDevice's [method free_rid] method. + + + + + + + + + Fills the content of an instances buffer. The number of BLASes and transforms passed as input should be the same and should equal the instance count used at instance buffer creation time. + + @@ -2145,6 +2287,23 @@ Set this flag so that it is created as storage. This is useful if Compute Shaders need access (for reading or writing) to the buffer, e.g. skeletal animations are processed in Compute Shaders which need access to vertex buffers, to be later consumed by vertex shaders as part of the regular rasterization pipeline. + + Allows usage of this buffer as input data for an acceleration structure build operation. You must first check that the GPU supports it: + [codeblocks] + [gdscript] + rd = RenderingServer.get_rendering_device() + + if rd.has_feature(RenderingDevice.SUPPORTS_RAYTRACING_PIPELINE): + storage_buffer = rd.storage_buffer_create(bytes.size(), bytes, RenderingDevice.BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT) + [/gdscript] + [/codeblocks] + + + An opaque geometry does not invoke the any hit shaders. + + + This geometry only calls the any hit shader a single time for each primitive. + Sampler uniform. @@ -2185,7 +2344,10 @@ [b]Note:[/b] This flag is not available to GD users due to being too dangerous (i.e. wrong usage can result in visual glitches). It's exposed in case GD users receive a buffer created with such flag from Godot. - + + Acceleration structure uniform. + + Represents the size of the [enum UniformType] enum. @@ -2493,7 +2655,22 @@ Compute shader stage. This can be used to run arbitrary computing tasks in a shader, performing them on the GPU instead of the CPU. - + + Ray generation shader stage. This can be used to generate primary rays. + + + Any hit shader stage. Invoked when ray intersections are not opaque. This can be used to specify what happens when a ray hits any of the geometry in the scene. + + + Closest hit shader stage. This can be used to specify what happens when a ray hits the closest geometry in the scene. + + + Miss shader stage. This can be used to specify what happens if a ray does not hit anything in the scene. + + + Intersection shader stage. The intersection shader for triangles is built-in. This can be used to compute ray intersections with primitives that are not triangles. + + Represents the size of the [enum ShaderStage] enum. @@ -2511,6 +2688,21 @@ Compute shader stage bit (see also [constant SHADER_STAGE_COMPUTE]). + + Ray generation shader stage bit (see also [constant SHADER_STAGE_RAYGEN]). + + + Any hit shader stage bit (see also [constant SHADER_STAGE_ANY_HIT]). + + + Closest hit shader stage bit (see also [constant SHADER_STAGE_CLOSEST_HIT]). + + + Miss shader stage bit (see also [constant SHADER_STAGE_MISS]). + + + Intersection shader stage bit (see also [constant SHADER_STAGE_INTERSECTION]). + Khronos' GLSL shading language (used natively by OpenGL and Vulkan). This is the language used for core Godot shaders. @@ -2538,6 +2730,16 @@ Support for 32-bit image atomic operations. + + Support for ray query extension. + + [b]Note:[/b] This is currently only supported when using Vulkan. This is not supported on macOS and iOS (even on hardware supporting raytracing) due to MoltenVK limitations. + + + Support for raytracing pipeline extension. + + [b]Note:[/b] This is currently only supported when using Vulkan. This is not supported on macOS and iOS (even on hardware supporting raytracing) due to MoltenVK limitations. + Maximum number of uniform sets that can be bound at a given time. diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 6587bce26b53..384e74829ce2 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -2284,7 +2284,8 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_ BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) { + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) { if (!barrier_capabilities.enhanced_barriers_supported) { // Enhanced barriers are a requirement for this function. return; @@ -3244,7 +3245,7 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_container(const Ref shader_info_in.spirv_specialization_constants_ids_mask = shader_refl_d3d12.spirv_specialization_constants_ids_mask; shader_info_in.nir_runtime_data_root_param_idx = shader_refl_d3d12.nir_runtime_data_root_param_idx; - shader_info_in.is_compute = shader_refl.is_compute; + shader_info_in.pipeline_type = shader_refl.pipeline_type; shader_info_in.sets.resize(shader_refl.uniform_sets.size()); for (uint32_t i = 0; i < shader_info_in.sets.size(); i++) { @@ -4178,10 +4179,14 @@ void RenderingDeviceDriverD3D12::command_bind_push_constants(CommandBufferID p_c if (!shader_info_in->dxil_push_constant_size) { return; } - if (shader_info_in->is_compute) { + if (shader_info_in->pipeline_type == PIPELINE_TYPE_COMPUTE) { cmd_buf_info->cmd_list->SetComputeRoot32BitConstants(0, p_data.size(), p_data.ptr(), p_dst_first_index); - } else { + } else if (shader_info_in->pipeline_type == PIPELINE_TYPE_RASTERIZATION) { cmd_buf_info->cmd_list->SetGraphicsRoot32BitConstants(0, p_data.size(), p_data.ptr(), p_dst_first_index); + } else if (shader_info_in->pipeline_type == PIPELINE_TYPE_RAYTRACING) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); + } else { + ERR_FAIL_MSG("This pipeline type is not currently supported by the D3D12 driver."); } } @@ -4453,7 +4458,7 @@ void RenderingDeviceDriverD3D12::_render_pass_enhanced_barriers_flush(CommandBuf } if (!texture_barriers.is_empty()) { - command_pipeline_barrier(p_cmd_buffer, src_stages, dst_stages, VectorView(), VectorView(), texture_barriers); + command_pipeline_barrier(p_cmd_buffer, src_stages, dst_stages, VectorView(), VectorView(), texture_barriers, VectorView()); } } @@ -5457,6 +5462,64 @@ RDD::PipelineID RenderingDeviceDriverD3D12::compute_pipeline_create(ShaderID p_s return PipelineID(pipeline_info); } +/********************/ +/**** RAYTRACING ****/ +/********************/ + +// ---- ACCELERATION STRUCTURES ---- + +RDD::AccelerationStructureID RenderingDeviceDriverD3D12::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BitField p_geometry_bits) { + ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the D3D12 driver."); +} + +uint32_t RenderingDeviceDriverD3D12::tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) { + ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the D3D12 driver."); +} + +void RenderingDeviceDriverD3D12::tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + +RDD::AccelerationStructureID RenderingDeviceDriverD3D12::tlas_create(BufferID p_instance_buffer) { + ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the D3D12 driver."); +} + +void RenderingDeviceDriverD3D12::acceleration_structure_free(AccelerationStructureID p_acceleration_structure) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + +uint32_t RenderingDeviceDriverD3D12::acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) { + ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the D3D12 driver."); +} + +// ----- PIPELINE ----- + +RDD::RaytracingPipelineID RenderingDeviceDriverD3D12::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { + ERR_FAIL_V_MSG(RaytracingPipelineID(), "Ray tracing is not currently supported by the D3D12 driver."); +} + +void RenderingDeviceDriverD3D12::raytracing_pipeline_free(RDD::RaytracingPipelineID p_pipeline) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverD3D12::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + +void RenderingDeviceDriverD3D12::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + +void RenderingDeviceDriverD3D12::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + +void RenderingDeviceDriverD3D12::command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the D3D12 driver."); +} + /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index d88ebb7f41c7..17556732f2c3 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -389,7 +389,8 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) override final; + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) override final; private: /****************/ @@ -597,7 +598,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { struct ShaderInfo { uint32_t dxil_push_constant_size = 0; uint32_t nir_runtime_data_root_param_idx = UINT32_MAX; - bool is_compute = false; + PipelineType pipeline_type = PIPELINE_TYPE_RASTERIZATION; struct UniformBindingInfo { uint32_t stages = 0; // Actual shader stages using the uniform (0 if totally optimized out). @@ -833,6 +834,31 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + /********************/ + /**** RAYTRACING ****/ + /********************/ + + // ---- ACCELERATION STRUCTURES ---- + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BitField p_geometry_bits) override final; + virtual uint32_t tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) override final; + virtual void tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) override final; + virtual AccelerationStructureID tlas_create(BufferID p_instances_buffer) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + virtual uint32_t acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) override final; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) override final; + /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/drivers/d3d12/rendering_shader_container_d3d12.cpp b/drivers/d3d12/rendering_shader_container_d3d12.cpp index 21a24f952db0..75de5c6d5b2f 100644 --- a/drivers/d3d12/rendering_shader_container_d3d12.cpp +++ b/drivers/d3d12/rendering_shader_container_d3d12.cpp @@ -562,7 +562,7 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) override final; + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) override final; #pragma mark - Fences @@ -442,6 +443,29 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; +#pragma mark - Raytracing + + // ----- ACCELERATION STRUCTURE ----- + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BitField p_geometry_bits) override final; + virtual uint32_t tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) override final; + virtual void tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) override final; + virtual AccelerationStructureID tlas_create(BufferID p_instances_buffer) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + virtual uint32_t acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) override final; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) override final; + #pragma mark - Queries // ----- TIMESTAMP ----- diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 2b5dff7434f1..7840bad20a51 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -845,7 +845,8 @@ static const API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MTLSamplerBorderC BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) { + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) { WARN_PRINT_ONCE("not implemented"); } @@ -1173,14 +1174,14 @@ static void update_uniform_info(const RenderingShaderContainerMetal::UniformData HashMap libraries; - bool is_compute = false; + PipelineType pipeline_type = PIPELINE_TYPE_RASTERIZATION; Vector decompressed_code; for (uint32_t shader_index = 0; shader_index < shaders.size(); shader_index++) { const RenderingShaderContainer::Shader &shader = shaders[shader_index]; const RSCM::StageData &shader_data = mtl_shaders[shader_index]; if (shader.shader_stage == RD::ShaderStage::SHADER_STAGE_COMPUTE) { - is_compute = true; + pipeline_type = PIPELINE_TYPE_COMPUTE; } if (ShaderCacheEntry **p = _shader_cache.getptr(shader_data.hash); p != nullptr) { @@ -1292,7 +1293,7 @@ static void update_uniform_info(const RenderingShaderContainerMetal::UniformData } MDShader *shader = nullptr; - if (is_compute) { + if (pipeline_type == PIPELINE_TYPE_COMPUTE) { MDComputeShader *cs = new MDComputeShader( shader_name, uniform_sets, @@ -2272,6 +2273,62 @@ static void update_uniform_info(const RenderingShaderContainerMetal::UniformData return PipelineID(pipeline); } +#pragma mark - Raytracing + +// ----- ACCELERATION STRUCTURE ----- + +RDD::AccelerationStructureID RenderingDeviceDriverMetal::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_coun, BitField p_geometry_bits) { + ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the Metal driver."); +} + +uint32_t RenderingDeviceDriverMetal::tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) { + ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the Metal driver."); +} + +void RenderingDeviceDriverMetal::tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + +RDD::AccelerationStructureID RenderingDeviceDriverMetal::tlas_create(BufferID p_instance_buffer) { + ERR_FAIL_V_MSG(AccelerationStructureID(), "Ray tracing is not currently supported by the Metal driver."); +} + +void RenderingDeviceDriverMetal::acceleration_structure_free(RDD::AccelerationStructureID p_acceleration_structure) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + +uint32_t RenderingDeviceDriverMetal::acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) { + ERR_FAIL_V_MSG(0, "Ray tracing is not currently supported by the Metal driver."); +} + +// ----- PIPELINE ----- + +RDD::RaytracingPipelineID RenderingDeviceDriverMetal::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { + ERR_FAIL_V_MSG(RaytracingPipelineID(), "Ray tracing is not currently supported by the Metal driver."); +} + +void RenderingDeviceDriverMetal::raytracing_pipeline_free(RDD::RaytracingPipelineID p_pipeline) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverMetal::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + +void RenderingDeviceDriverMetal::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + +void RenderingDeviceDriverMetal::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + +void RenderingDeviceDriverMetal::command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) { + ERR_FAIL_MSG("Ray tracing is not currently supported by the Metal driver."); +} + #pragma mark - Queries // ----- TIMESTAMP ----- diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index d819caf003a2..b5296809822e 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -48,6 +48,13 @@ #define ARRAY_SIZE(a) std_size(a) +// Disable raytracing support on macOS and iOS due to MoltenVK limitations. +#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED)) +#define VULKAN_RAYTRACING_ENABLED 1 +#else +#define VULKAN_RAYTRACING_ENABLED 0 +#endif + #define PRINT_NATIVE_COMMANDS 0 // Enable the use of re-spirv for optimizing shaders after applying specialization constants. @@ -408,6 +415,21 @@ uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_fla if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) { flags += SHADER_STAGE_COMPUTE_BIT; } + if (supported_stages & VK_SHADER_STAGE_RAYGEN_BIT_KHR) { + flags += SHADER_STAGE_RAYGEN_BIT; + } + if (supported_stages & VK_SHADER_STAGE_ANY_HIT_BIT_KHR) { + flags += SHADER_STAGE_ANY_HIT_BIT; + } + if (supported_stages & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) { + flags += SHADER_STAGE_CLOSEST_HIT_BIT; + } + if (supported_stages & VK_SHADER_STAGE_MISS_BIT_KHR) { + flags += SHADER_STAGE_MISS_BIT; + } + if (supported_stages & VK_SHADER_STAGE_INTERSECTION_BIT_KHR) { + flags += SHADER_STAGE_INTERSECTION_BIT; + } return flags; } @@ -558,6 +580,11 @@ Error RenderingDeviceDriverVulkan::_initialize_device_extensions() { _register_requested_device_extension(VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME, false); _register_requested_device_extension(VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME, false); _register_requested_device_extension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, false); + _register_requested_device_extension(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME, false); + _register_requested_device_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, false); // We don't actually use this extension, but some runtime components on some platforms // can and will fill the validation layers with useless info otherwise if not enabled. @@ -762,6 +789,10 @@ Error RenderingDeviceDriverVulkan::_check_device_features() { return OK; } +static uint32_t _align_up(uint32_t size, uint32_t alignment) { + return (size + (alignment - 1)) & ~(alignment - 1); +} + Error RenderingDeviceDriverVulkan::_check_device_capabilities() { // Fill device family and version. device_capabilities.device_family = DEVICE_VULKAN; @@ -795,6 +826,11 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {}; VkPhysicalDeviceMultiviewFeatures multiview_features = {}; VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {}; + VkPhysicalDeviceVulkanMemoryModelFeatures memory_model_features = {}; + VkPhysicalDeviceAccelerationStructureFeaturesKHR acceleration_structure_features = {}; + VkPhysicalDeviceRayTracingPipelineFeaturesKHR raytracing_pipeline_features = {}; + VkPhysicalDeviceSynchronization2FeaturesKHR sync_2_features = {}; + VkPhysicalDeviceRayTracingValidationFeaturesNV raytracing_validation_features = {}; const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2; if (use_1_2_features) { @@ -855,6 +891,36 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_features = &pipeline_cache_control_features; } + if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) { + memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES; + memory_model_features.pNext = next_features; + next_features = &memory_model_features; + } + + if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + acceleration_structure_features.pNext = next_features; + next_features = &acceleration_structure_features; + } + + if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + raytracing_pipeline_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR; + raytracing_pipeline_features.pNext = next_features; + next_features = &raytracing_pipeline_features; + } + + if (enabled_device_extension_names.has(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME)) { + raytracing_validation_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV; + raytracing_validation_features.pNext = next_features; + next_features = &raytracing_validation_features; + } + + if (enabled_device_extension_names.has(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)) { + sync_2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES; + sync_2_features.pNext = next_features; + next_features = &sync_2_features; + } + VkPhysicalDeviceFeatures2 device_features_2 = {}; device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; device_features_2.pNext = next_features; @@ -934,6 +1000,15 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { device_memory_report_support = true; } #endif + + if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) { + acceleration_structure_capabilities.acceleration_structure_support = acceleration_structure_features.accelerationStructure; + } + + if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) { + raytracing_capabilities.raytracing_pipeline_support = raytracing_pipeline_features.rayTracingPipeline; + raytracing_capabilities.validation = raytracing_validation_features.rayTracingValidation; + } } if (functions.GetPhysicalDeviceProperties2 != nullptr) { @@ -944,6 +1019,8 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { VkPhysicalDeviceMultiviewProperties multiview_properties = {}; VkPhysicalDeviceSubgroupProperties subgroup_properties = {}; VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {}; + VkPhysicalDeviceAccelerationStructurePropertiesKHR acceleration_structure_properties = {}; + VkPhysicalDeviceRayTracingPipelinePropertiesKHR raytracing_properties = {}; VkPhysicalDeviceProperties2 physical_device_properties_2 = {}; const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1; @@ -984,6 +1061,18 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { next_properties = &fdmo_properties; } + if (acceleration_structure_capabilities.acceleration_structure_support) { + acceleration_structure_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR; + acceleration_structure_properties.pNext = next_properties; + next_properties = &acceleration_structure_properties; + } + + if (raytracing_capabilities.raytracing_pipeline_support) { + raytracing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + raytracing_properties.pNext = next_properties; + next_properties = &raytracing_properties; + } + physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; physical_device_properties_2.pNext = next_properties; functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2); @@ -1084,6 +1173,29 @@ Error RenderingDeviceDriverVulkan::_check_device_capabilities() { if (subgroup_capabilities.quad_operations_in_all_stages) { print_verbose(" quad operations in all stages"); } + + if (acceleration_structure_capabilities.acceleration_structure_support) { + print_verbose("- Vulkan Acceleration Structure supported"); + acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment = acceleration_structure_properties.minAccelerationStructureScratchOffsetAlignment; + print_verbose(" min acceleration structure scratch offset alignment: " + itos(acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment)); + } else { + print_verbose("- Vulkan Acceleration Structure not supported"); + } + + if (raytracing_capabilities.raytracing_pipeline_support) { + raytracing_capabilities.shader_group_handle_size = raytracing_properties.shaderGroupHandleSize; + raytracing_capabilities.shader_group_handle_alignment = raytracing_properties.shaderGroupHandleAlignment; + raytracing_capabilities.shader_group_handle_size_aligned = _align_up(raytracing_capabilities.shader_group_handle_size, raytracing_capabilities.shader_group_handle_alignment); + raytracing_capabilities.shader_group_base_alignment = raytracing_properties.shaderGroupBaseAlignment; + + print_verbose("- Vulkan Raytracing supported"); + print_verbose(" shader group handle size: " + itos(raytracing_capabilities.shader_group_handle_size)); + print_verbose(" shader group handle alignment: " + itos(raytracing_capabilities.shader_group_handle_alignment)); + print_verbose(" shader group handle size aligned: " + itos(raytracing_capabilities.shader_group_handle_size_aligned)); + print_verbose(" shader group base alignment: " + itos(raytracing_capabilities.shader_group_base_alignment)); + } else { + print_verbose("- Vulkan Raytracing not supported"); + } } return OK; @@ -1220,6 +1332,30 @@ Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) { uint32_t alignment = 16u; // 16 bytes is reasonable. @@ -2640,6 +2787,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELI static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)); static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT, VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR)); // RDD::BarrierAccessBits == VkAccessFlagBits. static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT)); @@ -2659,6 +2808,8 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_ static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_WRITE_BIT)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR)); static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR)); void RenderingDeviceDriverVulkan::command_pipeline_barrier( CommandBufferID p_cmd_buffer, @@ -2666,13 +2817,14 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) { + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) { VkMemoryBarrier *vk_memory_barriers = ALLOCA_ARRAY(VkMemoryBarrier, p_memory_barriers.size()); for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { vk_memory_barriers[i] = {}; vk_memory_barriers[i].sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access); - vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access); + vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; + vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; } VkBufferMemoryBarrier *vk_buffer_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_buffer_barriers.size()); @@ -2681,8 +2833,8 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_buffer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; vk_buffer_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; vk_buffer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access); - vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access); + vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; + vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; vk_buffer_barriers[i].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer; vk_buffer_barriers[i].offset = p_buffer_barriers[i].offset; vk_buffer_barriers[i].size = p_buffer_barriers[i].size; @@ -2707,8 +2859,43 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( vk_image_barriers[i].subresourceRange.layerCount = p_texture_barriers[i].subresources.layer_count; } + VkPipelineStageFlags src_stage_flags = _rd_to_vk_pipeline_stages(p_src_stages); + VkPipelineStageFlags dst_stage_flags = _rd_to_vk_pipeline_stages(p_dst_stages); + VkPipelineStageFlags accel_src_stages = src_stage_flags; + VkPipelineStageFlags accel_dst_stages = dst_stage_flags; + + VkBufferMemoryBarrier *vk_accel_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_acceleration_structure_barriers.size()); + for (uint32_t i = 0; i < p_acceleration_structure_barriers.size(); i++) { + // If the rayQuery feature is not enabled and a memory barrier srcAccessMask includes + // VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, srcStageMask must not include any of the + // VK_PIPELINE_STAGE_*_SHADER_BIT stages except VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR + VkAccessFlags src_access = _rd_to_vk_access_flags(p_acceleration_structure_barriers[i].src_access); + if ((src_access & VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) != 0) { + accel_src_stages &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } + + // If the rayQuery feature is not enabled and a memory barrier dstAccessMask includes + // VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, dstStageMask must not include any of the + // VK_PIPELINE_STAGE_*_SHADER_BIT stages except VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR + VkAccessFlags dst_access = _rd_to_vk_access_flags(p_acceleration_structure_barriers[i].dst_access); + if ((dst_access & VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) != 0) { + accel_dst_stages &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT); + } + + const AccelerationStructureInfo *accel_info = (const AccelerationStructureInfo *)p_acceleration_structure_barriers[i].acceleration_structure.id; + vk_accel_barriers[i] = {}; + vk_accel_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + vk_accel_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_accel_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + vk_accel_barriers[i].srcAccessMask = src_access; + vk_accel_barriers[i].dstAccessMask = dst_access; + vk_accel_barriers[i].buffer = ((const BufferInfo *)accel_info->buffer.id)->vk_buffer; + vk_accel_barriers[i].offset = p_acceleration_structure_barriers[i].offset; + vk_accel_barriers[i].size = p_acceleration_structure_barriers[i].size; + } + #if PRINT_NATIVE_COMMANDS - print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size())); + print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d ACCELERATION STRUCTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size(), p_acceleration_structure_barriers.size())); for (uint32_t i = 0; i < p_memory_barriers.size(); i++) { print_line(vformat(" VkMemoryBarrier #%d src 0x%uX dst 0x%uX", i, vk_memory_barriers[i].srcAccessMask, vk_memory_barriers[i].dstAccessMask)); } @@ -2722,17 +2909,32 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier( uint64_t(vk_image_barriers[i].image), vk_image_barriers[i].oldLayout, vk_image_barriers[i].newLayout, vk_image_barriers[i].subresourceRange.baseMipLevel, vk_image_barriers[i].subresourceRange.levelCount, vk_image_barriers[i].subresourceRange.baseArrayLayer, vk_image_barriers[i].subresourceRange.layerCount)); } + + for (uint32_t i = 0; i < p_acceleration_structure_barriers.size(); i++) { + print_line(vformat(" VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX acceleration structure buffer 0x%ux", i, vk_accel_barriers[i].srcAccessMask, vk_accel_barriers[i].dstAccessMask, uint64_t(vk_accel_barriers[i].buffer))); + } #endif const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; vkCmdPipelineBarrier( command_buffer->vk_command_buffer, - _rd_to_vk_pipeline_stages(p_src_stages), - _rd_to_vk_pipeline_stages(p_dst_stages), + src_stage_flags, + dst_stage_flags, 0, p_memory_barriers.size(), vk_memory_barriers, p_buffer_barriers.size(), vk_buffer_barriers, p_texture_barriers.size(), vk_image_barriers); + + if (p_acceleration_structure_barriers.size() > 0) { + vkCmdPipelineBarrier( + command_buffer->vk_command_buffer, + accel_src_stages, + accel_dst_stages, + 0, + 0, nullptr, + p_acceleration_structure_barriers.size(), vk_accel_barriers, + 0, nullptr); + } } /****************/ @@ -3792,6 +3994,11 @@ static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_ VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_COMPUTE_BIT, + VK_SHADER_STAGE_RAYGEN_BIT_KHR, + VK_SHADER_STAGE_ANY_HIT_BIT_KHR, + VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR, + VK_SHADER_STAGE_MISS_BIT_KHR, + VK_SHADER_STAGE_INTERSECTION_BIT_KHR, }; RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { @@ -3872,6 +4079,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Re case UNIFORM_TYPE_INPUT_ATTACHMENT: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; } break; + case UNIFORM_TYPE_ACCELERATION_STRUCTURE: { + layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + } break; default: { DEV_ASSERT(false); } @@ -3898,6 +4108,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Re shader_info.respv_stage_shaders.reserve(stage_count); } + // AnyHit and ClosestHit go in the same group. + uint32_t hit_group_index = UINT32_MAX; + for (int i = 0; i < stage_count; i++) { const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i]; bool requires_decompression = (shader.code_decompressed_size > 0); @@ -3977,6 +4190,52 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Re create_info.module = vk_module; create_info.pName = "main"; shader_info.vk_stages_create_info.push_back(create_info); + + ShaderStage stage = shader_refl.stages_vector[i]; + + if (stage == ShaderStage::SHADER_STAGE_RAYGEN || stage == ShaderStage::SHADER_STAGE_MISS) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = VK_SHADER_UNUSED_KHR; + group_info.intersectionShader = VK_SHADER_UNUSED_KHR; + group_info.generalShader = i; + + shader_info.vk_groups_create_info.push_back(group_info); + } + if (stage == ShaderStage::SHADER_STAGE_ANY_HIT || stage == ShaderStage::SHADER_STAGE_CLOSEST_HIT) { + if (hit_group_index == UINT32_MAX) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = VK_SHADER_UNUSED_KHR; + group_info.intersectionShader = VK_SHADER_UNUSED_KHR; + group_info.generalShader = VK_SHADER_UNUSED_KHR; + + hit_group_index = shader_info.vk_groups_create_info.size(); + shader_info.vk_groups_create_info.push_back(group_info); + } + + VkRayTracingShaderGroupCreateInfoKHR &group_info = shader_info.vk_groups_create_info[hit_group_index]; + if (stage == ShaderStage::SHADER_STAGE_ANY_HIT) { + group_info.anyHitShader = i; + } else if (stage == ShaderStage::SHADER_STAGE_CLOSEST_HIT) { + group_info.closestHitShader = i; + } + } + if (stage == ShaderStage::SHADER_STAGE_INTERSECTION) { + VkRayTracingShaderGroupCreateInfoKHR group_info = {}; + group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR; + group_info.anyHitShader = VK_SHADER_UNUSED_KHR; + group_info.closestHitShader = VK_SHADER_UNUSED_KHR; + group_info.intersectionShader = i; + group_info.generalShader = VK_SHADER_UNUSED_KHR; + + shader_info.vk_groups_create_info.push_back(group_info); + } } // Descriptor sets. @@ -4046,6 +4305,30 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Re ERR_FAIL_V_MSG(ShaderID(), error_text); } + if (shader_refl.pipeline_type == PIPELINE_TYPE_RAYTRACING) { + // Regions + + for (ShaderStage stage : shader_refl.stages_vector) { + switch (stage) { + case ShaderStage::SHADER_STAGE_RAYGEN: + shader_info.region_count.raygen_count += 1; + break; + case ShaderStage::SHADER_STAGE_ANY_HIT: + case ShaderStage::SHADER_STAGE_CLOSEST_HIT: + shader_info.region_count.hit_count += 1; + break; + case ShaderStage::SHADER_STAGE_MISS: + shader_info.region_count.miss_count += 1; + break; + default: + // nothing + break; + } + } + + shader_info.region_count.group_count = shader_info.region_count.raygen_count + shader_info.region_count.hit_count + shader_info.region_count.miss_count; + } + // Bookkeep. ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); *shader_info_ptr = shader_info; @@ -4165,6 +4448,13 @@ VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_create(const curr_vk_size++; vk_sizes_count++; } + if (p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE]) { + *curr_vk_size = {}; + curr_vk_size->type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE] * max_descriptor_sets_per_pool; + curr_vk_size++; + vk_sizes_count++; + } DEV_ASSERT(vk_sizes_count <= UNIFORM_TYPE_MAX); } @@ -4414,6 +4704,17 @@ RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorViewsType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + acceleration_structure_write->accelerationStructureCount = 1; + acceleration_structure_write->pAccelerationStructures = &accel_info->vk_acceleration_structure; + + vk_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + vk_writes[i].pNext = acceleration_structure_write; + } break; default: { DEV_ASSERT(false); } @@ -5777,6 +6078,420 @@ RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create( return PipelineID(vk_pipeline); } +/********************/ +/**** RAYTRACING ****/ +/********************/ + +// RDD::AccelerationStructureGeometryBits == VkGeometryFlagsKHR. +static_assert(ENUM_MEMBERS_EQUAL(RDD::ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE, VK_GEOMETRY_OPAQUE_BIT_KHR)); +static_assert(ENUM_MEMBERS_EQUAL(RDD::ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION, VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR)); + +RDD::AccelerationStructureID RenderingDeviceDriverVulkan::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BitField p_geometry_bits) { +#if VULKAN_RAYTRACING_ENABLED + const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id; + + const VkVertexInputAttributeDescription *position_attribute = nullptr; + for (const VkVertexInputAttributeDescription &attribute : vf_info->vk_attributes) { + if (attribute.location == p_position_attribute_location) { + position_attribute = &attribute; + break; + } + } + ERR_FAIL_NULL_V_MSG(position_attribute, AccelerationStructureID(), "BLAS position attribute location is missing from the vertex format."); + + uint32_t position_binding_index = position_attribute->binding; + if (position_binding_index == UINT32_MAX) { + position_binding_index = p_position_attribute_location; + } + + const VkVertexInputBindingDescription *position_binding = nullptr; + for (const VkVertexInputBindingDescription &binding : vf_info->vk_bindings) { + if (binding.binding == position_binding_index) { + position_binding = &binding; + break; + } + } + ERR_FAIL_NULL_V_MSG(position_binding, AccelerationStructureID(), "BLAS position attribute binding is missing from the vertex format."); + + VkDeviceSize buffer_offset = position_attribute->offset; + + VkDeviceAddress vertex_address = buffer_get_device_address(p_vertex_buffer) + buffer_offset; + VkDeviceAddress index_address = buffer_get_device_address(p_index_buffer) + p_index_offset_bytes; + + VkDeviceSize vertex_stride = position_binding->stride; + VkFormat vertex_format = position_attribute->format; + uint32_t max_vertex = p_vertex_count ? p_vertex_count - 1 : 0; + + AccelerationStructureInfo *accel_info = VersatileResource::allocate(resources_allocator); + + accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR; + accel_info->geometry.flags = p_geometry_bits; + + accel_info->geometry.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR; + accel_info->geometry.geometry.triangles.vertexFormat = vertex_format; + accel_info->geometry.geometry.triangles.vertexData.deviceAddress = vertex_address; + accel_info->geometry.geometry.triangles.vertexStride = vertex_stride; + accel_info->geometry.geometry.triangles.indexType = p_index_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + accel_info->geometry.geometry.triangles.indexData.deviceAddress = index_address; + accel_info->geometry.geometry.triangles.transformData.deviceAddress = 0; + // Number of vertices in vertexData minus one, aka max vertex index. + accel_info->geometry.geometry.triangles.maxVertex = max_vertex; + + // Info for building BLAS. + uint32_t primitive_count = p_vertex_count / 3; + if (p_index_buffer) { + primitive_count = p_index_count / 3; + } + // The vertex offset is expressed in bytes. + uint32_t first_vertex = p_vertex_offset / vertex_stride; + accel_info->range_info.firstVertex = first_vertex; + accel_info->range_info.primitiveCount = primitive_count; + accel_info->range_info.primitiveOffset = 0; + accel_info->range_info.transformOffset = 0; + uint32_t max_primitive_count = accel_info->range_info.primitiveCount; + + accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accel_info->build_info.pGeometries = &accel_info->geometry; + accel_info->build_info.geometryCount = 1; + accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + + VkAccelerationStructureBuildSizesInfoKHR size_info = {}; + size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + + vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &max_primitive_count, &size_info); + _acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, size_info, accel_info); + + return AccelerationStructureID(accel_info); +#else + return AccelerationStructureID(); +#endif +} + +#if VULKAN_RAYTRACING_ENABLED +static _FORCE_INLINE_ void _store_transform_transposed_3x4(const Transform3D &p_mtx, VkTransformMatrixKHR &r_mtx) { + r_mtx.matrix[0][0] = p_mtx.basis.rows[0][0]; + r_mtx.matrix[0][1] = p_mtx.basis.rows[0][1]; + r_mtx.matrix[0][2] = p_mtx.basis.rows[0][2]; + r_mtx.matrix[0][3] = p_mtx.origin.x; + r_mtx.matrix[1][0] = p_mtx.basis.rows[1][0]; + r_mtx.matrix[1][1] = p_mtx.basis.rows[1][1]; + r_mtx.matrix[1][2] = p_mtx.basis.rows[1][2]; + r_mtx.matrix[1][3] = p_mtx.origin.y; + r_mtx.matrix[2][0] = p_mtx.basis.rows[2][0]; + r_mtx.matrix[2][1] = p_mtx.basis.rows[2][1]; + r_mtx.matrix[2][2] = p_mtx.basis.rows[2][2]; + r_mtx.matrix[2][3] = p_mtx.origin.z; +} +#endif + +uint32_t RenderingDeviceDriverVulkan::tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) { +#if VULKAN_RAYTRACING_ENABLED + return p_instance_count * sizeof(VkAccelerationStructureInstanceKHR); +#else + return 0; +#endif +} + +void RenderingDeviceDriverVulkan::tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) { +#if VULKAN_RAYTRACING_ENABLED + uint32_t blases_count = p_blases.size(); + ERR_FAIL_COND_MSG(blases_count != p_transforms.size(), "Blases and transforms vectors must have the same size."); + ERR_FAIL_COND(blases_count == 0); + + LocalVector instances; + instances.resize(blases_count); + + for (uint32_t i = 0; i < blases_count; ++i) { + const AccelerationStructureID &blas = p_blases[i]; + AccelerationStructureInfo *blas_info = (AccelerationStructureInfo *)blas.id; + + VkAccelerationStructureInstanceKHR &instance = instances[i]; + _store_transform_transposed_3x4(p_transforms[i], instance.transform); + instance.instanceCustomIndex = i; + instance.mask = 0xFF; + instance.accelerationStructureReference = buffer_get_device_address(blas_info->buffer); + instance.instanceShaderBindingTableRecordOffset = 0; + instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + } + + uint8_t *data_ptr = buffer_map(p_instances_buffer); + ERR_FAIL_NULL(data_ptr); + uint32_t instances_size = blases_count * sizeof(instances[0]); + memcpy(data_ptr, instances.ptr(), instances_size); + buffer_unmap(p_instances_buffer); +#endif +} + +RDD::AccelerationStructureID RenderingDeviceDriverVulkan::tlas_create(BufferID p_instances_buffer) { +#if VULKAN_RAYTRACING_ENABLED + ERR_FAIL_COND_V(p_instances_buffer == BufferID(), AccelerationStructureID()); + + AccelerationStructureInfo *accel_info = VersatileResource::allocate(resources_allocator); + + accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR; + accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR; + accel_info->geometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; + accel_info->geometry.geometry.instances.data.deviceAddress = buffer_get_device_address(p_instances_buffer); + + accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + accel_info->build_info.geometryCount = 1; + accel_info->build_info.pGeometries = &accel_info->geometry; + accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR; + + uint32_t instance_count = buffer_get_allocation_size(p_instances_buffer) / sizeof(VkAccelerationStructureInstanceKHR); + VkAccelerationStructureBuildSizesInfoKHR size_info = {}; + size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR; + vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &instance_count, &size_info); + accel_info->range_info.primitiveCount = instance_count; + + _acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, size_info, accel_info); + return AccelerationStructureID(accel_info); +#else + return AccelerationStructureID(); +#endif +} + +#if VULKAN_RAYTRACING_ENABLED +static VkDeviceAddress _align_up_address(VkDeviceAddress address, VkDeviceAddress alignment) { + return (address + (alignment - 1)) & ~(alignment - 1); +} +#endif + +void RenderingDeviceDriverVulkan::_acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info) { +#if VULKAN_RAYTRACING_ENABLED + RDD::BufferID buffer = buffer_create(p_size_info.accelerationStructureSize, RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT | RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU, UINT64_MAX); + r_accel_info->buffer = buffer; + + // Scratch address must be a multiple of minAccelerationStructureScratchOffsetAlignment. + r_accel_info->scratch_alignment = acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment; + r_accel_info->scratch_size = p_size_info.buildScratchSize + r_accel_info->scratch_alignment; + + VkAccelerationStructureCreateInfoKHR accel_create_info = {}; + accel_create_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR; + accel_create_info.type = p_type; + accel_create_info.size = p_size_info.accelerationStructureSize; + accel_create_info.buffer = ((const BufferInfo *)buffer.id)->vk_buffer; + VkResult err = vkCreateAccelerationStructureKHR(vk_device, &accel_create_info, nullptr, &r_accel_info->vk_acceleration_structure); + ERR_FAIL_COND_MSG(err, "vkCreateAccelerationStructureKHR failed with error " + itos(err) + "."); + r_accel_info->build_info.dstAccelerationStructure = r_accel_info->vk_acceleration_structure; +#endif +} + +void RenderingDeviceDriverVulkan::acceleration_structure_free(AccelerationStructureID p_acceleration_structure) { +#if VULKAN_RAYTRACING_ENABLED + AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id; + ERR_FAIL_NULL_MSG(accel_info, "Acceleration structure input parameter is not valid."); + if (accel_info->instances_buffer) { + buffer_free(accel_info->instances_buffer); + } + if (accel_info->buffer) { + buffer_free(accel_info->buffer); + } + if (accel_info->vk_acceleration_structure) { + vkDestroyAccelerationStructureKHR(vk_device, accel_info->vk_acceleration_structure, nullptr); + } + VersatileResource::free(resources_allocator, accel_info); +#endif +} + +uint32_t RenderingDeviceDriverVulkan::acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) { + AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id; + ERR_FAIL_NULL_V_MSG(accel_info, 0, "Acceleration structure input parameter is not valid."); + return accel_info->scratch_size; +} + +// ----- COMMANDS ----- + +void RenderingDeviceDriverVulkan::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) { +#if VULKAN_RAYTRACING_ENABLED + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id; + + VkAccelerationStructureBuildGeometryInfoKHR *build_info = &accel_info->build_info; + VkDeviceAddress scratch_address = buffer_get_device_address(p_scratch_buffer); + build_info->scratchData.deviceAddress = _align_up_address(scratch_address, accel_info->scratch_alignment); + + const VkAccelerationStructureBuildRangeInfoKHR *range_info_ptr = &accel_info->range_info; + + vkCmdBuildAccelerationStructuresKHR(command_buffer->vk_command_buffer, 1, build_info, &range_info_ptr); +#endif +} + +void RenderingDeviceDriverVulkan::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + bound_raytracing_pipeline_id = p_pipeline; + const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)p_pipeline.id; + vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rpi->vk_pipeline); +} + +void RenderingDeviceDriverVulkan::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) { + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id; + vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr); +} + +void RenderingDeviceDriverVulkan::command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) { +#if VULKAN_RAYTRACING_ENABLED + const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; + ERR_FAIL_COND_MSG(bound_raytracing_pipeline_id == RaytracingPipelineID(), "A raytracing pipeline must have been bound with `command_bind_raytracing_pipeline()`."); + const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)bound_raytracing_pipeline_id.id; + vkCmdTraceRaysKHR(command_buffer->vk_command_buffer, &rpi->regions.raygen, &rpi->regions.miss, &rpi->regions.hit, &rpi->regions.call, p_width, p_height, 1); +#endif +} + +// --- PIPELINE --- + +RDD::RaytracingPipelineID RenderingDeviceDriverVulkan::raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) { +#if VULKAN_RAYTRACING_ENABLED + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + + VkRayTracingPipelineCreateInfoKHR pipeline_create_info = {}; + pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; + + // Stages. + pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size(); + + VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, pipeline_create_info.stageCount); + + for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) { + vk_pipeline_stages[i] = shader_info->vk_stages_create_info[i]; + + if (p_specialization_constants.size()) { + VkSpecializationMapEntry *specialization_map_entries = ALLOCA_ARRAY(VkSpecializationMapEntry, p_specialization_constants.size()); + for (uint32_t j = 0; j < p_specialization_constants.size(); j++) { + specialization_map_entries[j] = {}; + specialization_map_entries[j].constantID = p_specialization_constants[j].constant_id; + specialization_map_entries[j].offset = (const char *)&p_specialization_constants[j].int_value - (const char *)p_specialization_constants.ptr(); + specialization_map_entries[j].size = sizeof(uint32_t); + } + + VkSpecializationInfo *specialization_info = ALLOCA_SINGLE(VkSpecializationInfo); + *specialization_info = {}; + specialization_info->dataSize = p_specialization_constants.size() * sizeof(PipelineSpecializationConstant); + specialization_info->pData = p_specialization_constants.ptr(); + specialization_info->mapEntryCount = p_specialization_constants.size(); + specialization_info->pMapEntries = specialization_map_entries; + + vk_pipeline_stages[i].pSpecializationInfo = specialization_info; + } + } + + // Groups. + pipeline_create_info.groupCount = pipeline_create_info.stageCount; + VkRayTracingShaderGroupCreateInfoKHR *vk_pipeline_groups = ALLOCA_ARRAY(VkRayTracingShaderGroupCreateInfoKHR, pipeline_create_info.groupCount); + for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) { + vk_pipeline_groups[i] = shader_info->vk_groups_create_info[i]; + } + + // Pipeline. + pipeline_create_info.layout = shader_info->vk_pipeline_layout; + pipeline_create_info.pStages = vk_pipeline_stages; + pipeline_create_info.pGroups = vk_pipeline_groups; + pipeline_create_info.maxPipelineRayRecursionDepth = 1; + + RaytracingPipelineInfo *rpi = VersatileResource::allocate(resources_allocator); + + VkResult err = vkCreateRayTracingPipelinesKHR(vk_device, VK_NULL_HANDLE, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &rpi->vk_pipeline); + ERR_FAIL_COND_V_MSG(err, RaytracingPipelineID(), "vkCreateRayTracingPipelinesKHR failed with error " + itos(err) + "."); + + RaytracingPipelineID raytracing_pipeline = RaytracingPipelineID(rpi); + err = _raytracing_pipeline_stb_create(raytracing_pipeline, p_shader); + ERR_FAIL_COND_V_MSG(err, RaytracingPipelineID(), "_raytracing_pipeline_stb_create failed with error " + itos(err) + "."); + + return raytracing_pipeline; +#else + return RaytracingPipelineID(); +#endif +} + +VkResult RenderingDeviceDriverVulkan::_raytracing_pipeline_stb_create(RaytracingPipelineID p_pipeline, ShaderID p_shader) { +#if VULKAN_RAYTRACING_ENABLED + RaytracingPipelineInfo *rpi = (RaytracingPipelineInfo *)p_pipeline.id; + const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id; + + // Shader group handles. + uint32_t handle_size_aligned = raytracing_capabilities.shader_group_handle_size_aligned; + uint32_t base_alignment = raytracing_capabilities.shader_group_base_alignment; + + rpi->regions.raygen.stride = _align_up(handle_size_aligned * shader_info->region_count.raygen_count, base_alignment); + rpi->regions.raygen.size = rpi->regions.raygen.stride; // odd but ok. + + rpi->regions.hit.stride = handle_size_aligned; + rpi->regions.hit.size = _align_up(handle_size_aligned * shader_info->region_count.hit_count, base_alignment); + + rpi->regions.miss.stride = handle_size_aligned; + rpi->regions.miss.size = _align_up(handle_size_aligned * shader_info->region_count.miss_count, base_alignment); + + rpi->regions.call.stride = 0; + rpi->regions.call.size = 0; + + // Shader binding table. + uint32_t sbt_size = rpi->regions.raygen.size + rpi->regions.hit.size + rpi->regions.miss.size + rpi->regions.call.size; + rpi->sbt_buffer = buffer_create(sbt_size, BUFFER_USAGE_TRANSFER_FROM_BIT | BUFFER_USAGE_DEVICE_ADDRESS_BIT | BUFFER_USAGE_SHADER_BINDING_TABLE_BIT, MEMORY_ALLOCATION_TYPE_CPU, UINT64_MAX); + + // Update regions addresses. + rpi->regions.raygen.deviceAddress = buffer_get_device_address(rpi->sbt_buffer); + rpi->regions.hit.deviceAddress = rpi->regions.raygen.deviceAddress + rpi->regions.raygen.size; + rpi->regions.miss.deviceAddress = rpi->regions.hit.deviceAddress + rpi->regions.hit.size; + rpi->regions.call.deviceAddress = 0; + + // Update shader binding table buffer. + uint32_t handle_size = raytracing_capabilities.shader_group_handle_size; + uint32_t handles_size = shader_info->region_count.group_count * handle_size; + LocalVector handles_data; + handles_data.resize(handles_size); + uint8_t *handles_ptr = handles_data.ptr(); + + VkResult err = vkGetRayTracingShaderGroupHandlesKHR(vk_device, rpi->vk_pipeline, 0, shader_info->region_count.group_count, handles_size, handles_ptr); + ERR_FAIL_COND_V_MSG(err, err, "vkGetRayTracingShaderGroupHandlesKHR failed with error " + itos(err) + "."); + + uint8_t *sbt_ptr = buffer_map(rpi->sbt_buffer); + uint8_t *sbt_data = sbt_ptr; + uint32_t handle_index = 0; + + // Raygen. + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + ++handle_index; + + // Hit. + sbt_data = sbt_ptr + rpi->regions.raygen.size; + for (uint32_t i = 0; i < shader_info->region_count.hit_count; ++i) { + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + sbt_data += rpi->regions.hit.stride; + ++handle_index; + } + + // Miss. + sbt_data = sbt_ptr + rpi->regions.raygen.size + rpi->regions.hit.size; + for (uint32_t i = 0; i < shader_info->region_count.miss_count; ++i) { + memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size); + sbt_data += rpi->regions.miss.stride; + ++handle_index; + } + + buffer_unmap(rpi->sbt_buffer); + + return err; +#else + return VK_ERROR_UNKNOWN; +#endif +} + +void RenderingDeviceDriverVulkan::raytracing_pipeline_free(RaytracingPipelineID p_pipeline) { + const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)p_pipeline.id; + vkDestroyPipeline(vk_device, rpi->vk_pipeline, nullptr); + if (rpi->sbt_buffer) { + buffer_free(rpi->sbt_buffer); + } + VersatileResource::free(resources_allocator, rpi); +} + /*****************/ /**** COMPUTE ****/ /*****************/ @@ -6314,6 +7029,14 @@ void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver case OBJECT_TYPE_PIPELINE: { _set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name); } break; + case OBJECT_TYPE_ACCELERATION_STRUCTURE: { + const AccelerationStructureInfo *asi = (const AccelerationStructureInfo *)p_driver_id.id; + _set_object_name(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)asi->vk_acceleration_structure, p_name); + } break; + case OBJECT_TYPE_RAYTRACING_PIPELINE: { + const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)p_driver_id.id; + _set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)rpi->vk_pipeline, p_name); + } break; default: { DEV_ASSERT(false); } @@ -6506,6 +7229,10 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) { return framebuffer_depth_resolve; case SUPPORTS_POINT_SIZE: return true; + case SUPPORTS_RAY_QUERY: + return acceleration_structure_capabilities.acceleration_structure_support && ray_query_support; + case SUPPORTS_RAYTRACING_PIPELINE: + return acceleration_structure_capabilities.acceleration_structure_support && raytracing_capabilities.raytracing_pipeline_support; default: return false; } diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index c53ac31b5b6d..9f620faf6f60 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -92,6 +92,20 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { bool storage_input_output_16 = false; }; + struct AccelerationStructureCapabilities { + bool acceleration_structure_support = false; + uint32_t min_acceleration_structure_scratch_offset_alignment = 0; + }; + + struct RaytracingCapabilities { + bool raytracing_pipeline_support = false; + uint32_t shader_group_handle_size = 0; + uint32_t shader_group_handle_alignment = 0; + uint32_t shader_group_handle_size_aligned = 0; + uint32_t shader_group_base_alignment = 0; + bool validation = false; + }; + struct DeviceFunctions { PFN_vkCreateSwapchainKHR CreateSwapchainKHR = nullptr; PFN_vkDestroySwapchainKHR DestroySwapchainKHR = nullptr; @@ -109,6 +123,10 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { // Debug device fault. PFN_vkGetDeviceFaultInfoEXT GetDeviceFaultInfoEXT = nullptr; + + // Raytracing extensions. + PFN_vkCreateAccelerationStructureKHR CreateAccelerationStructureKHR = nullptr; + PFN_vkCreateRayTracingPipelinesKHR CreateRaytracingPipelinesKHR = nullptr; }; // Debug marker extensions. VkDebugReportObjectTypeEXT _convert_to_debug_report_objectType(VkObjectType p_object_type); @@ -136,6 +154,9 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { bool buffer_device_address_support = false; bool vulkan_memory_model_support = false; bool vulkan_memory_model_device_scope_support = false; + AccelerationStructureCapabilities acceleration_structure_capabilities; + bool ray_query_support = false; + RaytracingCapabilities raytracing_capabilities; bool pipeline_cache_control_support = false; bool device_fault_support = false; bool framebuffer_depth_resolve = false; @@ -303,7 +324,8 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) override final; + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) override final; /****************/ /**** FENCES ****/ @@ -444,15 +466,25 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { /**** SHADER ****/ /****************/ private: + struct RaytracingShaderRegionCount { + uint32_t raygen_count = 0; + uint32_t hit_count = 0; + uint32_t miss_count = 0; + uint32_t group_count = 0; + }; + struct ShaderInfo { String name; VkShaderStageFlags vk_push_constant_stages = 0; TightLocalVector vk_stages_create_info; + TightLocalVector vk_groups_create_info; TightLocalVector vk_descriptor_set_layouts; TightLocalVector respv_stage_shaders; TightLocalVector> spirv_stage_bytes; TightLocalVector original_stage_size; VkPipelineLayout vk_pipeline_layout = VK_NULL_HANDLE; + // Used to update the shader binding table buffer. + RaytracingShaderRegionCount region_count; }; public: @@ -659,6 +691,76 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + /********************/ + /**** RAYTRACING ****/ + /********************/ + + // ----- ACCELERATION STRUCTURE ----- + + struct AccelerationStructureInfo { + VkAccelerationStructureKHR vk_acceleration_structure = VK_NULL_HANDLE; + // Buffer used for the structure + RDD::BufferID buffer; + + // Alignment of the scratch buffer for building the structure + uint32_t scratch_alignment; + // Size of the scratch buffer for building the structure + uint32_t scratch_size; + + // Buffer used for instances in a TLAS + RDD::BufferID instances_buffer; + + // Required for building + VkAccelerationStructureGeometryKHR geometry; + LocalVector instances; + VkAccelerationStructureBuildGeometryInfoKHR build_info; + VkAccelerationStructureBuildRangeInfoKHR range_info; + }; + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BitField p_geometry_bits) override final; + virtual uint32_t tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) override final; + virtual void tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) override final; + virtual AccelerationStructureID tlas_create(BufferID p_instances_buffer) override final; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) override final; + virtual uint32_t acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) override final; + +private: + void _acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info); + +public: + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) override final; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) override final; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) override final; + virtual void command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) override final; + +private: + RaytracingPipelineID bound_raytracing_pipeline_id; + + // ----- PIPELINE ----- + + struct RaytracingShaderRegions { + VkStridedDeviceAddressRegionKHR raygen; + VkStridedDeviceAddressRegionKHR hit; + VkStridedDeviceAddressRegionKHR miss; + VkStridedDeviceAddressRegionKHR call; + }; + + struct RaytracingPipelineInfo { + VkPipeline vk_pipeline = VK_NULL_HANDLE; + ShaderID shader; + // Used vkCmdTraceRaysKHR. + RaytracingShaderRegions regions; + // Shader binding table. + BufferID sbt_buffer; + }; + +public: + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) override final; + VkResult _raytracing_pipeline_stb_create(RaytracingPipelineID p_pipeline, ShaderID p_shader); + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) override final; + /*****************/ /**** QUERIES ****/ /*****************/ diff --git a/drivers/vulkan/rendering_shader_container_vulkan.cpp b/drivers/vulkan/rendering_shader_container_vulkan.cpp index 1b4dcb495915..0c0220587e66 100644 --- a/drivers/vulkan/rendering_shader_container_vulkan.cpp +++ b/drivers/vulkan/rendering_shader_container_vulkan.cpp @@ -101,7 +101,7 @@ RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatVulk } RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatVulkan::get_shader_spirv_version() const { - return SHADER_SPIRV_VERSION_1_3; + return SHADER_SPIRV_VERSION_1_4; } void RenderingShaderContainerFormatVulkan::set_debug_info_enabled(bool p_debug_info_enabled) { diff --git a/editor/shader/shader_file_editor_plugin.cpp b/editor/shader/shader_file_editor_plugin.cpp index 81d310a6f308..ae56858a6a07 100644 --- a/editor/shader/shader_file_editor_plugin.cpp +++ b/editor/shader/shader_file_editor_plugin.cpp @@ -274,7 +274,12 @@ ShaderFileEditor::ShaderFileEditor() { "Fragment", "TessControl", "TessEval", - "Compute" + "Compute", + "Raygen", + "AnyHit", + "ClosestHit", + "Miss", + "Intersection", }; stage_hb = memnew(HFlowContainer); diff --git a/gles3_builders.py b/gles3_builders.py index 24f7320af2cd..a2c4d2266149 100644 --- a/gles3_builders.py +++ b/gles3_builders.py @@ -9,6 +9,11 @@ class GLES3HeaderStruct: def __init__(self): self.vertex_lines = [] self.fragment_lines = [] + self.raygen_lines = [] + self.any_hit_lines = [] + self.closest_hit_lines = [] + self.miss_lines = [] + self.intersection_lines = [] self.uniforms = [] self.fbos = [] self.texunits = [] @@ -24,6 +29,11 @@ def __init__(self): self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 + self.raygen_offset = 0 + self.any_hit_offset = 0 + self.closest_hit_offset = 0 + self.miss_offset = 0 + self.intersection_offset = 0 self.variant_defines = [] self.variant_names = [] self.specialization_names = [] @@ -87,6 +97,41 @@ def include_file_in_gles3_header(filename: str, header_data: GLES3HeaderStruct, header_data.fragment_offset = header_data.line_offset continue + if line.find("#[raygen]") != -1: + header_data.reading = "raygen" + line = fs.readline() + header_data.line_offset += 1 + header_data.raygen_offset = header_data.line_offset + continue + + if line.find("#[any_hit]") != -1: + header_data.reading = "any_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.any_hit_offset = header_data.line_offset + continue + + if line.find("#[closest_hit]") != -1: + header_data.reading = "closest_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.closest_hit_offset = header_data.line_offset + continue + + if line.find("#[miss]") != -1: + header_data.reading = "miss" + line = fs.readline() + header_data.line_offset += 1 + header_data.miss_offset = header_data.line_offset + continue + + if line.find("#[intersection]") != -1: + header_data.reading = "intersection" + line = fs.readline() + header_data.line_offset += 1 + header_data.intersection_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -181,6 +226,16 @@ def include_file_in_gles3_header(filename: str, header_data: GLES3HeaderStruct, header_data.vertex_lines += [line] if header_data.reading == "fragment": header_data.fragment_lines += [line] + if header_data.reading == "raygen": + header_data.raygen_lines += [line] + if header_data.reading == "any_hit": + header_data.any_hit_lines += [line] + if header_data.reading == "closest_hit": + header_data.closest_hit_lines += [line] + if header_data.reading == "miss": + header_data.miss_lines += [line] + if header_data.reading == "intersection": + header_data.intersection_lines += [line] line = fs.readline() header_data.line_offset += 1 diff --git a/glsl_builders.py b/glsl_builders.py index dbe9d5adf1fd..ca2e5435d80d 100644 --- a/glsl_builders.py +++ b/glsl_builders.py @@ -10,16 +10,31 @@ def __init__(self): self.vertex_lines = [] self.fragment_lines = [] self.compute_lines = [] + self.raygen_lines = [] + self.any_hit_lines = [] + self.closest_hit_lines = [] + self.miss_lines = [] + self.intersection_lines = [] self.vertex_included_files = [] self.fragment_included_files = [] self.compute_included_files = [] + self.raygen_included_files = [] + self.any_hit_included_files = [] + self.closest_hit_included_files = [] + self.miss_included_files = [] + self.intersection_included_files = [] self.reading = "" self.line_offset = 0 self.vertex_offset = 0 self.fragment_offset = 0 self.compute_offset = 0 + self.raygen_offset = 0 + self.any_hit_offset = 0 + self.closest_hit_offset = 0 + self.miss_offset = 0 + self.intersection_offset = 0 def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: int) -> RDHeaderStruct: @@ -52,6 +67,41 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.compute_offset = header_data.line_offset continue + if line.find("#[raygen]") != -1: + header_data.reading = "raygen" + line = fs.readline() + header_data.line_offset += 1 + header_data.raygen_offset = header_data.line_offset + continue + + if line.find("#[any_hit]") != -1: + header_data.reading = "any_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.any_hit_offset = header_data.line_offset + continue + + if line.find("#[closest_hit]") != -1: + header_data.reading = "closest_hit" + line = fs.readline() + header_data.line_offset += 1 + header_data.closest_hit_offset = header_data.line_offset + continue + + if line.find("#[miss]") != -1: + header_data.reading = "miss" + line = fs.readline() + header_data.line_offset += 1 + header_data.miss_offset = header_data.line_offset + continue + + if line.find("#[intersection]") != -1: + header_data.reading = "intersection" + line = fs.readline() + header_data.line_offset += 1 + header_data.intersection_offset = header_data.line_offset + continue + while line.find("#include ") != -1: includeline = line.replace("#include ", "").strip()[1:-1] @@ -73,6 +123,31 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.compute_included_files += [included_file] if include_file_in_rd_header(included_file, header_data, depth + 1) is None: print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.raygen_included_files and header_data.reading == "raygen": + header_data.raygen_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.any_hit_included_files and header_data.reading == "any_hit": + header_data.any_hit_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif ( + included_file not in header_data.closest_hit_included_files and header_data.reading == "closest_hit" + ): + header_data.closest_hit_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif included_file not in header_data.miss_included_files and header_data.reading == "miss": + header_data.miss_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') + elif ( + included_file not in header_data.intersection_included_files + and header_data.reading == "intersection" + ): + header_data.intersection_included_files += [included_file] + if include_file_in_rd_header(included_file, header_data, depth + 1) is None: + print_error(f'In file "{filename}": #include "{includeline}" could not be found!"') line = fs.readline() @@ -84,6 +159,16 @@ def include_file_in_rd_header(filename: str, header_data: RDHeaderStruct, depth: header_data.fragment_lines += [line] if header_data.reading == "compute": header_data.compute_lines += [line] + if header_data.reading == "raygen": + header_data.raygen_lines += [line] + if header_data.reading == "any_hit": + header_data.any_hit_lines += [line] + if header_data.reading == "closest_hit": + header_data.closest_hit_lines += [line] + if header_data.reading == "miss": + header_data.miss_lines += [line] + if header_data.reading == "intersection": + header_data.intersection_lines += [line] line = fs.readline() header_data.line_offset += 1 @@ -104,13 +189,33 @@ class {class_name} : public ShaderRD {{ {class_name}() {{ """) - if header_data.compute_lines: + if header_data.raygen_lines: + file.write(f"""\ + static const char _raygen_code[] = {{ +{to_raw_cstring(header_data.raygen_lines)} + }}; + static const char _any_hit_code[] = {{ +{to_raw_cstring(header_data.any_hit_lines)} + }}; + static const char _closest_hit_code[] = {{ +{to_raw_cstring(header_data.closest_hit_lines)} + }}; + static const char _miss_code[] = {{ +{to_raw_cstring(header_data.miss_lines)} + }}; + static const char _intersection_code[] = {{ +{to_raw_cstring(header_data.intersection_lines)} + }}; + setup_raytracing(_raygen_code, _any_hit_code, _closest_hit_code, _miss_code, _intersection_code, "{class_name}"); +""") + elif header_data.compute_lines: file.write(f"""\ static const char *_vertex_code = nullptr; static const char *_fragment_code = nullptr; static const char _compute_code[] = {{ {to_raw_cstring(header_data.compute_lines)} }}; + setup(_vertex_code, _fragment_code, _compute_code, "{class_name}"); """) else: file.write(f"""\ @@ -121,12 +226,12 @@ class {class_name} : public ShaderRD {{ {to_raw_cstring(header_data.fragment_lines)} }}; static const char *_compute_code = nullptr; + setup(_vertex_code, _fragment_code, _compute_code, "{class_name}"); """) - file.write(f"""\ - setup(_vertex_code, _fragment_code, _compute_code, "{class_name}"); - }} -}}; + file.write("""\ + } +}; """) diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index 2fd5b7ccb2aa..952c4805b5c2 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -49,7 +49,12 @@ Vector compile_glslang_shader(RenderingDeviceCommons::ShaderStage p_sta EShLangFragment, EShLangTessControl, EShLangTessEvaluation, - EShLangCompute + EShLangCompute, + EShLangRayGen, + EShLangAnyHit, + EShLangClosestHit, + EShLangMiss, + EShLangIntersect, }; int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100 diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 1429a7d21d2d..ce96244705b1 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -65,6 +65,21 @@ void ShaderRD::_add_stage(const char *p_code, StageType p_stage_type) { case STAGE_TYPE_COMPUTE: chunk.type = StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS; break; + case STAGE_TYPE_RAYGEN: + chunk.type = StageTemplate::Chunk::TYPE_RAYGEN_GLOBALS; + break; + case STAGE_TYPE_ANY_HIT: + chunk.type = StageTemplate::Chunk::TYPE_ANY_HIT_GLOBALS; + break; + case STAGE_TYPE_CLOSEST_HIT: + chunk.type = StageTemplate::Chunk::TYPE_CLOSEST_HIT_GLOBALS; + break; + case STAGE_TYPE_MISS: + chunk.type = StageTemplate::Chunk::TYPE_MISS_GLOBALS; + break; + case STAGE_TYPE_INTERSECTION: + chunk.type = StageTemplate::Chunk::TYPE_INTERSECTION_GLOBALS; + break; default: { } } @@ -136,9 +151,9 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con if (p_compute_code) { _add_stage(p_compute_code, STAGE_TYPE_COMPUTE); - is_compute = true; + pipeline_type = RD::PIPELINE_TYPE_COMPUTE; } else { - is_compute = false; + pipeline_type = RD::PIPELINE_TYPE_RASTERIZATION; if (p_vertex_code) { _add_stage(p_vertex_code, STAGE_TYPE_VERTEX); } @@ -164,6 +179,47 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con base_sha256 = tohash.as_string().sha256_text(); } +void ShaderRD::setup_raytracing(const char *p_raygen_code, const char *p_any_hit_code, const char *p_closest_hit_code, const char *p_miss_code, const char *p_intersection_code, const char *p_name) { + name = p_name; + + pipeline_type = RD::PIPELINE_TYPE_RAYTRACING; + if (p_raygen_code) { + _add_stage(p_raygen_code, STAGE_TYPE_RAYGEN); + } + if (p_any_hit_code) { + _add_stage(p_any_hit_code, STAGE_TYPE_ANY_HIT); + } + if (p_closest_hit_code) { + _add_stage(p_closest_hit_code, STAGE_TYPE_CLOSEST_HIT); + } + if (p_miss_code) { + _add_stage(p_miss_code, STAGE_TYPE_MISS); + } + if (p_intersection_code) { + _add_stage(p_intersection_code, STAGE_TYPE_INTERSECTION); + } + + StringBuilder tohash; + tohash.append("[GodotVersionNumber]"); + tohash.append(GODOT_VERSION_NUMBER); + tohash.append("[GodotVersionHash]"); + tohash.append(GODOT_VERSION_HASH); + tohash.append("[Raygen]"); + tohash.append(p_raygen_code ? p_raygen_code : ""); + tohash.append("[AnyHit]"); + tohash.append(p_any_hit_code ? p_any_hit_code : ""); + tohash.append("[ClosestHit]"); + tohash.append(p_closest_hit_code ? p_closest_hit_code : ""); + tohash.append("[Miss]"); + tohash.append(p_miss_code ? p_miss_code : ""); + tohash.append("[Intersection]"); + tohash.append(p_intersection_code ? p_intersection_code : ""); + tohash.append("[DebugInfo]"); + tohash.append(Engine::get_singleton()->is_generate_spirv_debug_info_enabled() ? "1" : "0"); + + base_sha256 = tohash.as_string().sha256_text(); +} + RID ShaderRD::version_create(bool p_embedded) { //initialize() was never called ERR_FAIL_COND_V(group_to_variant_map.is_empty(), RID()); @@ -251,6 +307,21 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c case StageTemplate::Chunk::TYPE_COMPUTE_GLOBALS: { builder.append(p_version->compute_globals.get_data()); // compute globals } break; + case StageTemplate::Chunk::TYPE_RAYGEN_GLOBALS: { + builder.append(p_version->raygen_globals.get_data()); // raygen globals + } break; + case StageTemplate::Chunk::TYPE_ANY_HIT_GLOBALS: { + builder.append(p_version->any_hit_globals.get_data()); // any_hit globals + } break; + case StageTemplate::Chunk::TYPE_CLOSEST_HIT_GLOBALS: { + builder.append(p_version->closest_hit_globals.get_data()); // closest_hit globals + } break; + case StageTemplate::Chunk::TYPE_MISS_GLOBALS: { + builder.append(p_version->miss_globals.get_data()); // miss globals + } break; + case StageTemplate::Chunk::TYPE_INTERSECTION_GLOBALS: { + builder.append(p_version->intersection_globals.get_data()); // intersection globals + } break; case StageTemplate::Chunk::TYPE_CODE: { if (p_version->code_sections.has(chunk.code)) { builder.append(p_version->code_sections[chunk.code].get_data()); @@ -271,11 +342,46 @@ Vector ShaderRD::_build_variant_stage_sources(uint32_t p_variant, Compil Vector stage_sources; stage_sources.resize(RD::SHADER_STAGE_MAX); - if (is_compute) { + if (pipeline_type == RD::PIPELINE_TYPE_COMPUTE) { // Compute stage. StringBuilder builder; _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_COMPUTE]); stage_sources.write[RD::SHADER_STAGE_COMPUTE] = builder.as_string(); + } else if (pipeline_type == RD::PIPELINE_TYPE_RAYTRACING) { + { + // Raygen stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_RAYGEN]); + stage_sources.write[RD::SHADER_STAGE_RAYGEN] = builder.as_string(); + } + + { + // Any hit stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_ANY_HIT]); + stage_sources.write[RD::SHADER_STAGE_ANY_HIT] = builder.as_string(); + } + + { + // Closest hit stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_CLOSEST_HIT]); + stage_sources.write[RD::SHADER_STAGE_CLOSEST_HIT] = builder.as_string(); + } + + { + // Miss stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_MISS]); + stage_sources.write[RD::SHADER_STAGE_MISS] = builder.as_string(); + } + + { + // Intersection stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_INTERSECTION]); + stage_sources.write[RD::SHADER_STAGE_INTERSECTION] = builder.as_string(); + } } else { { // Vertex stage. @@ -338,8 +444,8 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.resize(variant_defines.size()); for (int i = 0; i < source_code.versions.size(); i++) { - if (!is_compute) { - //vertex stage + if (pipeline_type == RD::PIPELINE_TYPE_RASTERIZATION) { + // Vertex stage. StringBuilder builder; _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_VERTEX]); @@ -351,8 +457,8 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } - if (!is_compute) { - //fragment stage + if (pipeline_type == RD::PIPELINE_TYPE_RASTERIZATION) { + // Fragment stage. StringBuilder builder; _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_FRAGMENT]); @@ -364,8 +470,8 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } - if (is_compute) { - //compute stage + if (pipeline_type == RD::PIPELINE_TYPE_COMPUTE) { + // Compute stage. StringBuilder builder; _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_COMPUTE]); @@ -376,6 +482,67 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio source_code.versions.write[i].stages.push_back(stage); } + + if (pipeline_type == RD::PIPELINE_TYPE_RAYTRACING) { + // Raygen stage. + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_RAYGEN]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "raygen"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PIPELINE_TYPE_RAYTRACING) { + // Any hit stage. + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_ANY_HIT]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "any_hit"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PIPELINE_TYPE_RAYTRACING) { + // Closest hit stage. + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_CLOSEST_HIT]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "closest_hit"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PIPELINE_TYPE_RAYTRACING) { + // Miss stage. + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_MISS]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "miss"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } + if (pipeline_type == RD::PIPELINE_TYPE_RAYTRACING) { + // Intersection stage. + + StringBuilder builder; + _build_variant_code(builder, i, version, stage_templates[STAGE_TYPE_INTERSECTION]); + + RS::ShaderNativeSourceCode::Version::Stage stage; + stage.name = "intersection"; + stage.code = builder.as_string(); + + source_code.versions.write[i].stages.push_back(stage); + } } return source_code; @@ -399,6 +566,16 @@ String ShaderRD::_version_get_sha1(Version *p_version) const { hash_build.append(p_version->fragment_globals.get_data()); hash_build.append("[compute_globals]"); hash_build.append(p_version->compute_globals.get_data()); + hash_build.append("[raygen_globals]"); + hash_build.append(p_version->raygen_globals.get_data()); + hash_build.append("[any_hit_globals]"); + hash_build.append(p_version->any_hit_globals.get_data()); + hash_build.append("[closest_hit_globals]"); + hash_build.append(p_version->closest_hit_globals.get_data()); + hash_build.append("[miss_globals]"); + hash_build.append(p_version->miss_globals.get_data()); + hash_build.append("[intersection_globals]"); + hash_build.append(p_version->intersection_globals.get_data()); Vector code_sections; for (const KeyValue &E : p_version->code_sections) { @@ -608,8 +785,33 @@ void ShaderRD::_compile_ensure_finished(Version *p_version) { } } +void ShaderRD::_version_set(Version *p_version, const HashMap &p_code, const Vector &p_custom_defines) { + p_version->code_sections.clear(); + for (const KeyValue &E : p_code) { + p_version->code_sections[StringName(E.key.to_upper())] = E.value.utf8(); + } + + p_version->custom_defines.clear(); + for (const String &custom_define : p_custom_defines) { + p_version->custom_defines.push_back(custom_define.utf8()); + } + + p_version->dirty = true; + if (p_version->initialize_needed) { + _initialize_version(p_version); + for (int i = 0; i < group_enabled.size(); i++) { + if (!group_enabled[i]) { + _allocate_placeholders(p_version, i); + continue; + } + _compile_version_start(p_version, i); + } + p_version->initialize_needed = false; + } +} + void ShaderRD::version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines) { - ERR_FAIL_COND(is_compute); + ERR_FAIL_COND(pipeline_type != RD::PIPELINE_TYPE_RASTERIZATION); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); @@ -621,32 +823,12 @@ void ShaderRD::version_set_code(RID p_version, const HashMap &p_ version->vertex_globals = p_vertex_globals.utf8(); version->fragment_globals = p_fragment_globals.utf8(); version->uniforms = p_uniforms.utf8(); - version->code_sections.clear(); - for (const KeyValue &E : p_code) { - version->code_sections[StringName(E.key.to_upper())] = E.value.utf8(); - } - version->custom_defines.clear(); - for (int i = 0; i < p_custom_defines.size(); i++) { - version->custom_defines.push_back(p_custom_defines[i].utf8()); - } - - version->dirty = true; - if (version->initialize_needed) { - _initialize_version(version); - for (int i = 0; i < group_enabled.size(); i++) { - if (!group_enabled[i]) { - _allocate_placeholders(version, i); - continue; - } - _compile_version_start(version, i); - } - version->initialize_needed = false; - } + _version_set(version, p_code, p_custom_defines); } void ShaderRD::version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines) { - ERR_FAIL_COND(!is_compute); + ERR_FAIL_COND(pipeline_type != RD::PIPELINE_TYPE_COMPUTE); Version *version = version_owner.get_or_null(p_version); ERR_FAIL_NULL(version); @@ -658,28 +840,23 @@ void ShaderRD::version_set_compute_code(RID p_version, const HashMapcompute_globals = p_compute_globals.utf8(); version->uniforms = p_uniforms.utf8(); - version->code_sections.clear(); - for (const KeyValue &E : p_code) { - version->code_sections[StringName(E.key.to_upper())] = E.value.utf8(); - } + _version_set(version, p_code, p_custom_defines); +} - version->custom_defines.clear(); - for (int i = 0; i < p_custom_defines.size(); i++) { - version->custom_defines.push_back(p_custom_defines[i].utf8()); - } +void ShaderRD::version_set_raytracing_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_raygen_globals, const String &p_any_hit_globals, const String &p_closest_hit_globals, const String &p_miss_globals, const String &p_intersection_globals, const Vector &p_custom_defines) { + ERR_FAIL_COND(pipeline_type != RD::PIPELINE_TYPE_RAYTRACING); - version->dirty = true; - if (version->initialize_needed) { - _initialize_version(version); - for (int i = 0; i < group_enabled.size(); i++) { - if (!group_enabled[i]) { - _allocate_placeholders(version, i); - continue; - } - _compile_version_start(version, i); - } - version->initialize_needed = false; - } + Version *version = version_owner.get_or_null(p_version); + ERR_FAIL_NULL(version); + + version->raygen_globals = p_raygen_globals.utf8(); + version->any_hit_globals = p_any_hit_globals.utf8(); + version->closest_hit_globals = p_closest_hit_globals.utf8(); + version->miss_globals = p_miss_globals.utf8(); + version->intersection_globals = p_intersection_globals.utf8(); + version->uniforms = p_uniforms.utf8(); + + _version_set(version, p_code, p_custom_defines); } bool ShaderRD::version_is_valid(RID p_version) { diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index e5bd4e5bb303..63d321aa3d8a 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -73,6 +73,11 @@ class ShaderRD { CharString vertex_globals; CharString compute_globals; CharString fragment_globals; + CharString raygen_globals; + CharString any_hit_globals; + CharString closest_hit_globals; + CharString miss_globals; + CharString intersection_globals; HashMap code_sections; Vector custom_defines; Vector group_compilation_tasks; @@ -113,6 +118,11 @@ class ShaderRD { TYPE_VERTEX_GLOBALS, TYPE_FRAGMENT_GLOBALS, TYPE_COMPUTE_GLOBALS, + TYPE_RAYGEN_GLOBALS, + TYPE_ANY_HIT_GLOBALS, + TYPE_CLOSEST_HIT_GLOBALS, + TYPE_MISS_GLOBALS, + TYPE_INTERSECTION_GLOBALS, TYPE_CODE, TYPE_TEXT }; @@ -124,7 +134,7 @@ class ShaderRD { LocalVector chunks; }; - bool is_compute = false; + RD::PipelineType pipeline_type = RD::PIPELINE_TYPE_RASTERIZATION; String name; @@ -149,6 +159,11 @@ class ShaderRD { STAGE_TYPE_VERTEX, STAGE_TYPE_FRAGMENT, STAGE_TYPE_COMPUTE, + STAGE_TYPE_RAYGEN, + STAGE_TYPE_ANY_HIT, + STAGE_TYPE_CLOSEST_HIT, + STAGE_TYPE_MISS, + STAGE_TYPE_INTERSECTION, STAGE_TYPE_MAX, }; @@ -165,16 +180,19 @@ class ShaderRD { bool _load_from_cache(Version *p_version, int p_group); void _save_to_cache(Version *p_version, int p_group); void _initialize_cache(); + void _version_set(Version *p_version, const HashMap &p_code, const Vector &p_custom_defines); protected: ShaderRD(); void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); + void setup_raytracing(const char *p_raygen_code, const char *p_any_hit_code, const char *p_closest_hit_code, const char *p_miss_code, const char *p_intersection_code, const char *p_name); public: RID version_create(bool p_embedded = true); void version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines); void version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines); + void version_set_raytracing_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_raygen_globals, const String &p_any_hit_globals, const String &p_closest_hit_globals, const String &p_miss_globals, const String &p_intersection_globals, const Vector &p_custom_defines); _FORCE_INLINE_ RID version_get_shader(RID p_version, int p_variant) { ERR_FAIL_INDEX_V(p_variant, variant_defines.size(), RID()); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 72cfafc93be6..005933c9e749 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -216,6 +216,238 @@ RID RenderingDevice::shader_create_from_spirv(const Vector return shader_create_from_bytecode(bytecode); } +/********************************/ +/**** ACCELERATION STRUCTURE ****/ +/********************************/ + +RID RenderingDevice::blas_create(RID p_vertex_array, RID p_index_array, BitField p_geometry_bits, uint32_t p_position_attribute_location) { + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE) && !has_feature(SUPPORTS_RAY_QUERY), RID(), "The current rendering device has neither raytracing pipeline nor ray query support."); + + VertexArray *vertex_array = vertex_array_owner.get_or_null(p_vertex_array); + ERR_FAIL_NULL_V(vertex_array, RID()); + + uint32_t position_binding = p_position_attribute_location; + RDD::VertexFormatID vertex_format; + + if (vertex_array->description != INVALID_ID) { + ERR_FAIL_COND_V(!vertex_formats.has(vertex_array->description), RID()); + const VertexDescriptionCache &vd_cache = vertex_formats[vertex_array->description]; + vertex_format = vd_cache.driver_id; + + const VertexAttribute *position_attribute = nullptr; + for (int i = 0; i < vd_cache.vertex_formats.size(); i++) { + const VertexAttribute &attr = vd_cache.vertex_formats[i]; + if (attr.location == p_position_attribute_location) { + position_attribute = &attr; + break; + } + } + ERR_FAIL_NULL_V_MSG(position_attribute, RID(), vformat("Vertex array is missing a position attribute at location %u.", p_position_attribute_location)); + ERR_FAIL_COND_V_MSG(position_attribute->frequency != VERTEX_FREQUENCY_VERTEX, RID(), vformat("Position attribute at location %u must use vertex frequency.", p_position_attribute_location)); + + if (position_attribute->binding != UINT32_MAX) { + position_binding = position_attribute->binding; + } + } + + ERR_FAIL_COND_V_MSG(position_binding >= (uint32_t)vertex_array->buffers.size(), RID(), vformat("Vertex array is missing a buffer for binding %u.", position_binding)); + RDD::BufferID vertex_buffer = vertex_array->buffers[position_binding]; + uint64_t vertex_offset = vertex_array->offsets[position_binding]; + + // Indices are optional. + IndexArray *index_array = index_array_owner.get_or_null(p_index_array); + RDD::BufferID index_buffer = RDD::BufferID(); + IndexBufferFormat index_format = IndexBufferFormat::INDEX_BUFFER_FORMAT_UINT32; + uint32_t index_offset_bytes = 0; + uint32_t index_count = 0; + if (index_array) { + index_buffer = index_array->driver_id; + index_format = index_array->format; + index_offset_bytes = index_array->offset * (index_array->format == INDEX_BUFFER_FORMAT_UINT16 ? sizeof(uint16_t) : sizeof(uint32_t)); + index_count = index_array->indices; + } + + AccelerationStructure acceleration_structure; + acceleration_structure.type = RDD::ACCELERATION_STRUCTURE_TYPE_BLAS; + + BitField geometry_bits = 0; + if (p_geometry_bits.has_flag(ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE)) { + geometry_bits.set_flag(RDD::ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE); + } + if (p_geometry_bits.has_flag(ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION)) { + geometry_bits.set_flag(RDD::ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION); + } + + acceleration_structure.driver_id = driver->blas_create(vertex_buffer, vertex_offset, vertex_format, vertex_array->vertex_count, p_position_attribute_location, index_buffer, index_format, index_offset_bytes, index_count, geometry_bits); + ERR_FAIL_COND_V_MSG(!acceleration_structure.driver_id, RID(), "Failed to create BLAS."); + acceleration_structure.vertex_array = p_vertex_array; + acceleration_structure.index_array = p_index_array; + + acceleration_structure.draw_tracker = RDG::resource_tracker_create(); + acceleration_structure.draw_tracker->acceleration_structure_driver_id = acceleration_structure.driver_id; + // Assume we are going to build this acceleration structure + acceleration_structure.draw_tracker->usage = RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE; + + for (int i = 0; i < vertex_array->draw_trackers.size(); i++) { + acceleration_structure.draw_trackers.push_back(vertex_array->draw_trackers[i]); + } + _check_transfer_worker_vertex_array(vertex_array); + + if (index_array && index_array->draw_tracker) { + acceleration_structure.draw_trackers.push_back(index_array->draw_tracker); + } + _check_transfer_worker_index_array(index_array); + + RID id = acceleration_structure_owner.make_rid(acceleration_structure); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + +BitField RenderingDevice::_creation_to_usage_bits(BitField p_creation_bits) { + BitField usage = 0; + + if (p_creation_bits.has_flag(BUFFER_CREATION_AS_STORAGE_BIT)) { + usage.set_flag(RDD::BUFFER_USAGE_STORAGE_BIT); + } + + if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_BUFFER_DEVICE_ADDRESS), 0, + "The GPU doesn't support buffer address flag."); +#endif + usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT); + } + + if (p_creation_bits.has_flag(BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT)) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE) && !has_feature(SUPPORTS_RAY_QUERY), 0, + "The GPU doesn't support acceleration structure build input flag."); +#endif + usage.set_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + } + + return usage; +} + +RID RenderingDevice::tlas_instances_buffer_create(uint32_t p_instance_count, BitField p_creation_bits) { + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE) && !has_feature(SUPPORTS_RAY_QUERY), RID(), "The current rendering device has neither raytracing pipeline nor ray query support."); + ERR_FAIL_COND_V(p_instance_count == 0, RID()); + + uint32_t instances_buffer_size_bytes = driver->tlas_instances_buffer_get_size_bytes(p_instance_count); + + InstancesBuffer instances_buffer; + instances_buffer.instance_count = p_instance_count; + instances_buffer.buffer.size = instances_buffer_size_bytes; + instances_buffer.buffer.usage = _creation_to_usage_bits(p_creation_bits) | RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT | RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT; + instances_buffer.buffer.driver_id = driver->buffer_create(instances_buffer.buffer.size, instances_buffer.buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn); + ERR_FAIL_COND_V_MSG(!instances_buffer.buffer.driver_id, RID(), "Failed to create instances buffer."); + + _THREAD_SAFE_LOCK_ + buffer_memory += instances_buffer.buffer.size; + _THREAD_SAFE_UNLOCK_ + + RID id = instances_buffer_owner.make_rid(instances_buffer); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + +void RenderingDevice::tlas_instances_buffer_fill(RID p_instances_buffer, const Vector &p_blases, VectorView p_transforms) { + ERR_FAIL_COND_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE) && !has_feature(SUPPORTS_RAY_QUERY), "The current rendering device has neither raytracing pipeline nor ray query support."); + + InstancesBuffer *instances_buffer = instances_buffer_owner.get_or_null(p_instances_buffer); + ERR_FAIL_NULL_MSG(instances_buffer, "Instances buffer input is not valid."); + + uint32_t blases_count = p_blases.size(); + ERR_FAIL_COND_MSG(blases_count != instances_buffer->instance_count, "The number of blases is not equal to the instance count of the instances buffer."); + ERR_FAIL_COND_MSG(blases_count != p_transforms.size(), "Blases and transforms vectors must have the same size."); + + thread_local LocalVector blases; + blases.resize(blases_count); + + for (uint32_t i = 0; i < blases_count; i++) { + const AccelerationStructure *blas = acceleration_structure_owner.get_or_null(p_blases[i]); + ERR_FAIL_NULL_MSG(blas, "BLAS input is not valid."); + ERR_FAIL_COND_MSG(blas->type != RDD::ACCELERATION_STRUCTURE_TYPE_BLAS, "Acceleration structure input is not a BLAS."); + blases[i] = blas->driver_id; + } + + instances_buffer->blases = p_blases; + + driver->tlas_instances_buffer_fill(instances_buffer->buffer.driver_id, blases, p_transforms); +} + +RID RenderingDevice::tlas_create(RID p_instances_buffer) { + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE) && !has_feature(SUPPORTS_RAY_QUERY), RID(), "The current rendering device has neither raytracing pipeline nor ray query support."); + + const InstancesBuffer *instances_buffer = instances_buffer_owner.get_or_null(p_instances_buffer); + ERR_FAIL_NULL_V_MSG(instances_buffer, RID(), "Instances buffer input is not valid."); + + AccelerationStructure acceleration_structure; + acceleration_structure.type = RDD::ACCELERATION_STRUCTURE_TYPE_TLAS; + acceleration_structure.driver_id = driver->tlas_create(instances_buffer->buffer.driver_id); + ERR_FAIL_COND_V_MSG(!acceleration_structure.driver_id, RID(), "Failed to create TLAS."); + acceleration_structure.instances_buffer = p_instances_buffer; + + acceleration_structure.draw_tracker = RDG::resource_tracker_create(); + acceleration_structure.draw_tracker->acceleration_structure_driver_id = acceleration_structure.driver_id; + // Assume we are going to build this acceleration structure + acceleration_structure.draw_tracker->usage = RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE; + + for (Vector::ConstIterator itr = instances_buffer->blases.begin(); itr != instances_buffer->blases.end(); ++itr) { + const AccelerationStructure *blas = acceleration_structure_owner.get_or_null(*itr); + ERR_FAIL_NULL_V_MSG(blas, RID(), "BLAS input is not valid."); + if (blas->draw_tracker) { + acceleration_structure.draw_trackers.push_back(blas->draw_tracker); + } + } + + RID id = acceleration_structure_owner.make_rid(acceleration_structure); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + return id; +} + +Error RenderingDevice::acceleration_structure_build(RID p_acceleration_structure) { + ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); + + ERR_FAIL_COND_V_MSG(draw_list.active, ERR_INVALID_PARAMETER, + "Building acceleration structures is forbidden during creation of a draw list."); + ERR_FAIL_COND_V_MSG(compute_list.active, ERR_INVALID_PARAMETER, + "Building acceleration structures is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, ERR_INVALID_PARAMETER, + "Building acceleration structures is forbidden during creation of a raytracing list."); + + AccelerationStructure *accel = acceleration_structure_owner.get_or_null(p_acceleration_structure); + ERR_FAIL_NULL_V_MSG(accel, ERR_INVALID_PARAMETER, "Acceleration structure argument is not valid."); + + uint64_t scratch_size = driver->acceleration_structure_get_scratch_size_bytes(accel->driver_id); + + const Buffer *scratch_buffer = storage_buffer_owner.get_or_null(accel->scratch_buffer); + if (scratch_buffer && driver->buffer_get_allocation_size(scratch_buffer->driver_id) < scratch_size) { + scratch_buffer = nullptr; + free_rid(accel->scratch_buffer); + accel->scratch_buffer = RID(); + } + if (accel->scratch_buffer == RID()) { + accel->scratch_buffer = storage_buffer_create(scratch_size, { nullptr, 0 }, RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT); + ERR_FAIL_COND_V(accel->scratch_buffer == RID(), ERR_CANT_CREATE); + } + + if (scratch_buffer == nullptr) { + scratch_buffer = storage_buffer_owner.get_or_null(accel->scratch_buffer); + ERR_FAIL_NULL_V_MSG(scratch_buffer, ERR_CANT_CREATE, "Scratch buffer is not valid."); + } + + draw_graph.add_acceleration_structure_build(accel->driver_id, scratch_buffer->driver_id, accel->draw_tracker, accel->draw_trackers); + + return OK; +} + /***************************/ /**** BUFFER MANAGEMENT ****/ /***************************/ @@ -233,6 +465,8 @@ RenderingDevice::Buffer *RenderingDevice::_get_buffer_from_owner(RID p_buffer) { //buffer = texture_buffer_owner.get_or_null(p_buffer)->buffer; } else if (storage_buffer_owner.owns(p_buffer)) { buffer = storage_buffer_owner.get_or_null(p_buffer); + } else if (instances_buffer_owner.owns(p_buffer)) { + buffer = &instances_buffer_owner.get_or_null(p_buffer)->buffer; } return buffer; } @@ -426,9 +660,11 @@ Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); ERR_FAIL_COND_V_MSG(draw_list.active, ERR_INVALID_PARAMETER, - "Copying buffers is forbidden during creation of a draw list"); + "Copying buffers is forbidden during creation of a draw list."); ERR_FAIL_COND_V_MSG(compute_list.active, ERR_INVALID_PARAMETER, - "Copying buffers is forbidden during creation of a compute list"); + "Copying buffers is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a raytracing list."); Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer); if (!src_buffer) { @@ -469,9 +705,11 @@ Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p copy_bytes_count += p_size; ERR_FAIL_COND_V_MSG(draw_list.active && !p_skip_check, ERR_INVALID_PARAMETER, - "Updating buffers is forbidden during creation of a draw list"); + "Updating buffers is forbidden during creation of a draw list."); ERR_FAIL_COND_V_MSG(compute_list.active && !p_skip_check, ERR_INVALID_PARAMETER, - "Updating buffers is forbidden during creation of a compute list"); + "Updating buffers is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list.active && !p_skip_check, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a raytracing list."); Buffer *buffer = _get_buffer_from_owner(p_buffer); ERR_FAIL_NULL_V_MSG(buffer, ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type."); @@ -557,9 +795,11 @@ Error RenderingDevice::driver_callback_add(RDD::DriverCallback p_callback, void ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); ERR_FAIL_COND_V_MSG(draw_list.active, ERR_INVALID_PARAMETER, - "Driver callback is forbidden during creation of a draw list"); + "Driver callback is forbidden during creation of a draw list."); ERR_FAIL_COND_V_MSG(compute_list.active, ERR_INVALID_PARAMETER, - "Driver callback is forbidden during creation of a compute list"); + "Driver callback is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, ERR_INVALID_PARAMETER, + "Driver callback is forbidden during creation of a raytracing list."); thread_local LocalVector trackers; thread_local LocalVector usages; @@ -628,11 +868,13 @@ Error RenderingDevice::buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_ ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); ERR_FAIL_COND_V_MSG((p_size % 4) != 0, ERR_INVALID_PARAMETER, - "Size must be a multiple of four"); + "Size must be a multiple of four."); ERR_FAIL_COND_V_MSG(draw_list.active, ERR_INVALID_PARAMETER, - "Updating buffers in is forbidden during creation of a draw list"); + "Updating buffers in is forbidden during creation of a draw list."); ERR_FAIL_COND_V_MSG(compute_list.active, ERR_INVALID_PARAMETER, - "Updating buffers is forbidden during creation of a compute list"); + "Updating buffers is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, ERR_INVALID_PARAMETER, + "Updating buffers is forbidden during creation of a raytracing list."); Buffer *buffer = _get_buffer_from_owner(p_buffer); if (!buffer) { @@ -840,6 +1082,14 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, Span buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT); } + if (p_creation_bits.has_flag(BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT)) { +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE) && !has_feature(SUPPORTS_RAY_QUERY), RID(), + "The GPU doesn't support acceleration structure build input flag."); +#endif + buffer.usage.set_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + } + buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn); ERR_FAIL_COND_V(!buffer.driver_id, RID()); @@ -1528,7 +1778,7 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons tb.subresources.mipmap_count = texture->mipmaps; tb.subresources.base_layer = p_layer; tb.subresources.layer_count = 1; - driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb); + driver->command_pipeline_barrier(transfer_worker->command_buffer, RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, RDD::PIPELINE_STAGE_COPY_BIT, {}, {}, tb, {}); } } @@ -1614,7 +1864,9 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons Error RenderingDevice::texture_update(RID p_texture, uint32_t p_layer, const Vector &p_data) { ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE); - ERR_FAIL_COND_V_MSG(draw_list.active || compute_list.active, ERR_INVALID_PARAMETER, "Updating textures is forbidden during creation of a draw or compute list"); + ERR_FAIL_COND_V_MSG(draw_list.active, ERR_INVALID_PARAMETER, "Updating textures is forbidden during creation of a draw list."); + ERR_FAIL_COND_V_MSG(compute_list.active, ERR_INVALID_PARAMETER, "Updating textures is forbidden during creation of a compute list."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, ERR_INVALID_PARAMETER, "Updating textures is forbidden during creation of a raytracing list."); Texture *texture = texture_owner.get_or_null(p_texture); ERR_FAIL_NULL_V(texture, ERR_INVALID_PARAMETER); @@ -3187,6 +3439,9 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, Span p if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) { buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT); } + if (p_creation_bits.has_flag(BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT)) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + } buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn); ERR_FAIL_COND_V(!buffer.driver_id, RID()); @@ -3389,9 +3644,15 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm #endif index_buffer.size = size_bytes; index_buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_INDEX_BIT); + if (p_creation_bits.has_flag(BUFFER_CREATION_AS_STORAGE_BIT)) { + index_buffer.usage.set_flag(RDD::BUFFER_USAGE_STORAGE_BIT); + } if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) { index_buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT); } + if (p_creation_bits.has_flag(BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT)) { + index_buffer.usage.set_flag(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + } index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn); ERR_FAIL_COND_V(!index_buffer.driver_id, RID()); @@ -3584,6 +3845,13 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vectorstage_bits.set_flag(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT); break; + case SHADER_STAGE_RAYGEN: + case SHADER_STAGE_ANY_HIT: + case SHADER_STAGE_CLOSEST_HIT: + case SHADER_STAGE_MISS: + case SHADER_STAGE_INTERSECTION: + shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT); + break; default: DEV_ASSERT(false && "Unknown shader stage."); break; @@ -4039,7 +4307,7 @@ RID RenderingDevice::uniform_set_create(const VectorView &p_uniform _check_transfer_worker_buffer(buffer); } break; case UNIFORM_TYPE_INPUT_ATTACHMENT: { - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for compute shader (this is not allowed)."); + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PIPELINE_TYPE_RASTERIZATION, RID(), "InputAttachment (binding: " + itos(uniform.binding) + ") supplied for non-render shader (this is not allowed)."); if (uniform.get_id_count() != (uint32_t)set_uniform.length) { if (set_uniform.length > 1) { @@ -4065,6 +4333,22 @@ RID RenderingDevice::uniform_set_create(const VectorView &p_uniform _check_transfer_worker_texture(texture); } } break; + case UNIFORM_TYPE_ACCELERATION_STRUCTURE: { + ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(), + "Acceleration structure supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided)."); + + RID accel_id = uniform.get_id(0); + AccelerationStructure *accel = acceleration_structure_owner.get_or_null(accel_id); + ERR_FAIL_NULL_V_MSG(accel, RID(), "Acceleration Structure supplied (binding: " + itos(uniform.binding) + ") is invalid."); + + if (accel->draw_tracker != nullptr) { + draw_trackers.push_back(accel->draw_tracker); + // Acceleration structure is never going to be writable from raytracing shaders + draw_trackers_usage.push_back(RDG::RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ); + } + + driver_uniform.ids.push_back(accel->driver_id); + } break; default: { } } @@ -4129,7 +4413,8 @@ RID RenderingDevice::render_pipeline_create(RID p_shader, FramebufferFormatID p_ // Needs a shader. Shader *shader = shader_owner.get_or_null(p_shader); ERR_FAIL_NULL_V(shader, RID()); - ERR_FAIL_COND_V_MSG(shader->is_compute, RID(), "Compute shaders can't be used in render pipelines"); + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PIPELINE_TYPE_RASTERIZATION, RID(), + "Only render shaders can be used in render pipelines"); // Validate pre-raster shader. One of stages must be vertex shader or mesh shader (not implemented yet). ERR_FAIL_COND_V_MSG(!shader->stage_bits.has_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT), RID(), "Pre-raster shader (vertex shader) is not provided for pipeline creation."); @@ -4322,7 +4607,7 @@ RID RenderingDevice::compute_pipeline_create(RID p_shader, const Vectoris_compute, RID(), + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PIPELINE_TYPE_COMPUTE, RID(), "Non-compute shaders can't be used in compute pipelines"); } @@ -4375,6 +4660,57 @@ bool RenderingDevice::compute_pipeline_is_valid(RID p_pipeline) { return compute_pipeline_owner.owns(p_pipeline); } +RID RenderingDevice::raytracing_pipeline_create(RID p_shader, const Vector &p_specialization_constants) { + _THREAD_SAFE_METHOD_ + + // Needs a shader. + Shader *shader = shader_owner.get_or_null(p_shader); + ERR_FAIL_NULL_V(shader, RID()); + + ERR_FAIL_COND_V_MSG(shader->pipeline_type != PIPELINE_TYPE_RAYTRACING, RID(), + "Only raytracing shaders can be used in raytracing pipelines"); + + for (int i = 0; i < shader->specialization_constants.size(); i++) { + const ShaderSpecializationConstant &sc = shader->specialization_constants[i]; + for (int j = 0; j < p_specialization_constants.size(); j++) { + const PipelineSpecializationConstant &psc = p_specialization_constants[j]; + if (psc.constant_id == sc.constant_id) { + ERR_FAIL_COND_V_MSG(psc.type != sc.type, RID(), "Specialization constant provided for id (" + itos(sc.constant_id) + ") is of the wrong type."); + break; + } + } + } + + RaytracingPipeline pipeline; + pipeline.driver_id = driver->raytracing_pipeline_create(shader->driver_id, p_specialization_constants); + ERR_FAIL_COND_V(!pipeline.driver_id, RID()); + + if (pipeline_cache_enabled) { + update_pipeline_cache(); + } + + pipeline.shader = p_shader; + pipeline.shader_driver_id = shader->driver_id; + pipeline.shader_layout_hash = shader->layout_hash; + pipeline.set_formats = shader->set_formats; + pipeline.push_constant_size = shader->push_constant_size; + + // Create ID to associate with this pipeline. + RID id = raytracing_pipeline_owner.make_rid(pipeline); +#ifdef DEV_ENABLED + set_resource_name(id, "RID:" + itos(id.get_id())); +#endif + // Now add all the dependencies. + _add_dependency(id, p_shader); + return id; +} + +bool RenderingDevice::raytracing_pipeline_is_valid(RID p_pipeline) { + _THREAD_SAFE_METHOD_ + + return raytracing_pipeline_owner.owns(p_pipeline); +} + /****************/ /**** SCREEN ****/ /****************/ @@ -4520,6 +4856,7 @@ RenderingDevice::DrawListID RenderingDevice::draw_list_begin_for_screen(DisplayS ERR_FAIL_COND_V_MSG(draw_list.active, INVALID_ID, "Only one draw list can be active at the same time."); ERR_FAIL_COND_V_MSG(compute_list.active, INVALID_ID, "Only one draw/compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, INVALID_ID, "Only one draw/raytracing list can be active at the same time."); RenderingContextDriver::SurfaceID surface = context->surface_get_from_window(p_screen); HashMap::ConstIterator sc_it = screen_swap_chains.find(p_screen); @@ -5366,6 +5703,236 @@ void RenderingDevice::draw_list_end() { draw_list_bound_textures.clear(); } +/***************************/ +/**** RAYTRACING LISTS ****/ +/**************************/ + +RenderingDevice::RaytracingListID RenderingDevice::raytracing_list_begin() { + ERR_RENDER_THREAD_GUARD_V(INVALID_ID); + + ERR_FAIL_COND_V_MSG(!has_feature(SUPPORTS_RAYTRACING_PIPELINE), INVALID_ID, "The current rendering device has no raytracing pipeline support."); + + ERR_FAIL_COND_V_MSG(draw_list.active, INVALID_ID, "Only one draw/raytracing list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list.active, INVALID_ID, "Only one compute/raytracing list can be active at the same time."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, INVALID_ID, "Only one raytracing list can be active at the same time."); + + raytracing_list.active = true; + + draw_graph.add_raytracing_list_begin(); + + return ID_TYPE_RAYTRACING_LIST; +} + +void RenderingDevice::raytracing_list_bind_raytracing_pipeline(RaytracingListID p_list, RID p_raytracing_pipeline) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_COND(!raytracing_list.active); + + const RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_raytracing_pipeline); + ERR_FAIL_NULL(pipeline); + + if (p_raytracing_pipeline == raytracing_list.state.pipeline) { + return; // Redundant state, return. + } + + raytracing_list.state.pipeline = p_raytracing_pipeline; + raytracing_list.state.pipeline_driver_id = pipeline->driver_id; + + draw_graph.add_raytracing_list_bind_pipeline(pipeline->driver_id); + + if (raytracing_list.state.pipeline_shader != pipeline->shader) { + // Shader changed, so descriptor sets may become incompatible. + + uint32_t pcount = pipeline->set_formats.size(); // Formats count in this pipeline. + raytracing_list.state.set_count = MAX(raytracing_list.state.set_count, pcount); + const uint32_t *pformats = pipeline->set_formats.ptr(); // Pipeline set formats. + + uint32_t first_invalid_set = UINT32_MAX; // All valid by default. + switch (driver->api_trait_get(RDD::API_TRAIT_SHADER_CHANGE_INVALIDATION)) { + case RDD::SHADER_CHANGE_INVALIDATION_ALL_BOUND_UNIFORM_SETS: { + first_invalid_set = 0; + } break; + case RDD::SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE: { + for (uint32_t i = 0; i < pcount; i++) { + if (raytracing_list.state.sets[i].pipeline_expected_format != pformats[i]) { + first_invalid_set = i; + break; + } + } + } break; + case RDD::SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH: { + if (raytracing_list.state.pipeline_shader_layout_hash != pipeline->shader_layout_hash) { + first_invalid_set = 0; + } + } break; + } + + for (uint32_t i = 0; i < pcount; i++) { + raytracing_list.state.sets[i].bound = raytracing_list.state.sets[i].bound && i < first_invalid_set; + raytracing_list.state.sets[i].pipeline_expected_format = pformats[i]; + } + + for (uint32_t i = pcount; i < raytracing_list.state.set_count; i++) { + // Unbind the ones above (not used) if exist. + raytracing_list.state.sets[i].bound = false; + } + + raytracing_list.state.set_count = pcount; // Update set count. + + if (pipeline->push_constant_size) { +#ifdef DEBUG_ENABLED + raytracing_list.validation.pipeline_push_constant_supplied = false; +#endif + } + + raytracing_list.state.pipeline_shader = pipeline->shader; + raytracing_list.state.pipeline_shader_driver_id = pipeline->shader_driver_id; + raytracing_list.state.pipeline_shader_layout_hash = pipeline->shader_layout_hash; + } + +#ifdef DEBUG_ENABLED + // Update raytracing pass pipeline info. + raytracing_list.validation.pipeline_active = true; + raytracing_list.validation.pipeline_push_constant_size = pipeline->push_constant_size; +#endif +} + +void RenderingDevice::raytracing_list_bind_uniform_set(RaytracingListID p_list, RID p_uniform_set, uint32_t p_index) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_COND(!raytracing_list.active); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_index >= driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS) || p_index >= MAX_UNIFORM_SETS, + "Attempting to bind a descriptor set (" + itos(p_index) + ") greater than what the hardware supports (" + itos(driver->limit_get(LIMIT_MAX_BOUND_UNIFORM_SETS)) + ")."); +#endif + + UniformSet *uniform_set = uniform_set_owner.get_or_null(p_uniform_set); + ERR_FAIL_NULL(uniform_set); + + if (p_index > raytracing_list.state.set_count) { + raytracing_list.state.set_count = p_index; + } + + raytracing_list.state.sets[p_index].uniform_set_driver_id = uniform_set->driver_id; // Update set pointer. + raytracing_list.state.sets[p_index].bound = false; // Needs rebind. + raytracing_list.state.sets[p_index].uniform_set_format = uniform_set->format; + raytracing_list.state.sets[p_index].uniform_set = p_uniform_set; +} + +void RenderingDevice::raytracing_list_set_push_constant(RaytracingListID p_list, const void *p_data, uint32_t p_data_size) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_COND(!raytracing_list.active); + + ERR_FAIL_COND_MSG(p_data_size > MAX_PUSH_CONSTANT_SIZE, "Push constants can't be bigger than 128 bytes to maintain compatibility."); + +#ifdef DEBUG_ENABLED + ERR_FAIL_COND_MSG(p_data_size != raytracing_list.validation.pipeline_push_constant_size, + "This raytracing pipeline requires (" + itos(raytracing_list.validation.pipeline_push_constant_size) + ") bytes of push constant data, supplied: (" + itos(p_data_size) + ")"); +#endif + + draw_graph.add_raytracing_list_set_push_constant(raytracing_list.state.pipeline_shader_driver_id, p_data, p_data_size); + + // Store it in the state in case we need to restart the raytracing list. + memcpy(raytracing_list.state.push_constant_data, p_data, p_data_size); + raytracing_list.state.push_constant_size = p_data_size; + +#ifdef DEBUG_ENABLED + raytracing_list.validation.pipeline_push_constant_supplied = true; +#endif +} + +void RenderingDevice::raytracing_list_trace_rays(RaytracingListID p_list, uint32_t p_width, uint32_t p_height) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(p_list != ID_TYPE_RAYTRACING_LIST); + ERR_FAIL_COND(!raytracing_list.active); + +#ifdef DEBUG_ENABLED + ERR_FAIL_NULL_MSG(shader_owner.get_or_null(raytracing_list.state.pipeline_shader), "No shader was set before attempting to trace rays."); + ERR_FAIL_NULL_MSG(raytracing_pipeline_owner.get_or_null(raytracing_list.state.pipeline), "No raytracing pipeline was set before attempting to trace rays."); +#endif + +#ifdef DEBUG_ENABLED + + ERR_FAIL_COND_MSG(!raytracing_list.validation.pipeline_active, "No raytracing pipeline was set before attempting to draw."); + + if (raytracing_list.validation.pipeline_push_constant_size > 0) { + // Using push constants, check that they were supplied. + ERR_FAIL_COND_MSG(!raytracing_list.validation.pipeline_push_constant_supplied, + "The shader in this pipeline requires a push constant to be set before drawing, but it's not present."); + } + +#endif + +#ifdef DEBUG_ENABLED + for (uint32_t i = 0; i < raytracing_list.state.set_count; i++) { + if (raytracing_list.state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + if (raytracing_list.state.sets[i].pipeline_expected_format != raytracing_list.state.sets[i].uniform_set_format) { + if (raytracing_list.state.sets[i].uniform_set_format == 0) { + ERR_FAIL_MSG("Uniforms were never supplied for set (" + itos(i) + ") at the time of drawing, which are required by the pipeline."); + } else if (uniform_set_owner.owns(raytracing_list.state.sets[i].uniform_set)) { + UniformSet *us = uniform_set_owner.get_or_null(raytracing_list.state.sets[i].uniform_set); + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + "):\n" + _shader_uniform_debug(us->shader_id, us->shader_set) + "\nare not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(raytracing_list.state.pipeline_shader)); + } else { + ERR_FAIL_MSG("Uniforms supplied for set (" + itos(i) + ", which was just freed) are not the same format as required by the pipeline shader. Pipeline shader requires the following bindings:\n" + _shader_uniform_debug(raytracing_list.state.pipeline_shader)); + } + } + } +#endif + + // Prepare descriptor sets if the API doesn't use pipeline barriers. + if (!driver->api_trait_get(RDD::API_TRAIT_HONORS_PIPELINE_BARRIERS)) { + for (uint32_t i = 0; i < raytracing_list.state.set_count; i++) { + if (raytracing_list.state.sets[i].pipeline_expected_format == 0) { + // Nothing expected by this pipeline. + continue; + } + + draw_graph.add_raytracing_list_uniform_set_prepare_for_use(raytracing_list.state.pipeline_shader_driver_id, raytracing_list.state.sets[i].uniform_set_driver_id, i); + } + } + + // Bind descriptor sets. + for (uint32_t i = 0; i < raytracing_list.state.set_count; i++) { + if (raytracing_list.state.sets[i].pipeline_expected_format == 0) { + continue; // Nothing expected by this pipeline. + } + if (!raytracing_list.state.sets[i].bound) { + // All good, see if this requires re-binding. + draw_graph.add_raytracing_list_bind_uniform_set(raytracing_list.state.pipeline_shader_driver_id, raytracing_list.state.sets[i].uniform_set_driver_id, i); + + UniformSet *uniform_set = uniform_set_owner.get_or_null(raytracing_list.state.sets[i].uniform_set); + _uniform_set_update_shared(uniform_set); + + draw_graph.add_raytracing_list_usages(uniform_set->draw_trackers, uniform_set->draw_trackers_usage); + + raytracing_list.state.sets[i].bound = true; + } + } + + draw_graph.add_raytracing_list_trace_rays(p_width, p_height); + raytracing_list.state.trace_count++; +} + +void RenderingDevice::raytracing_list_end() { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(!raytracing_list.active); + + draw_graph.add_raytracing_list_end(); + + raytracing_list = RaytracingList(); +} + /***********************/ /**** COMPUTE LISTS ****/ /***********************/ @@ -5373,7 +5940,8 @@ void RenderingDevice::draw_list_end() { RenderingDevice::ComputeListID RenderingDevice::compute_list_begin() { ERR_RENDER_THREAD_GUARD_V(INVALID_ID); - ERR_FAIL_COND_V_MSG(compute_list.active, INVALID_ID, "Only one draw/compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(compute_list.active, INVALID_ID, "Only one compute list can be active at the same time."); + ERR_FAIL_COND_V_MSG(raytracing_list.active, INVALID_ID, "Only one raytracing list can be active at the same time."); compute_list.active = true; @@ -6101,7 +6669,7 @@ void RenderingDevice::_submit_transfer_workers(RDD::CommandBufferID p_draw_comma void RenderingDevice::_submit_transfer_barriers(RDD::CommandBufferID p_draw_command_buffer) { MutexLock transfer_worker_lock(transfer_worker_pool_texture_barriers_mutex); if (!transfer_worker_pool_texture_barriers.is_empty()) { - driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers); + driver->command_pipeline_barrier(p_draw_command_buffer, RDD::PIPELINE_STAGE_COPY_BIT, RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, {}, {}, transfer_worker_pool_texture_barriers, {}); transfer_worker_pool_texture_barriers.clear(); } } @@ -6400,6 +6968,13 @@ void RenderingDevice::_free_internal(RID p_id) { RDG::resource_tracker_free(storage_buffer->draw_tracker); frames[frame].buffers_to_dispose_of.push_back(*storage_buffer); storage_buffer_owner.free(p_id); + } else if (instances_buffer_owner.owns(p_id)) { + InstancesBuffer *instances_buffer = instances_buffer_owner.get_or_null(p_id); + _check_transfer_worker_buffer(&instances_buffer->buffer); + + RDG::resource_tracker_free(instances_buffer->buffer.draw_tracker); + frames[frame].buffers_to_dispose_of.push_back(instances_buffer->buffer); + instances_buffer_owner.free(p_id); } else if (uniform_set_owner.owns(p_id)) { UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id); frames[frame].uniform_sets_to_dispose_of.push_back(*uniform_set); @@ -6416,6 +6991,14 @@ void RenderingDevice::_free_internal(RID p_id) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); frames[frame].compute_pipelines_to_dispose_of.push_back(*pipeline); compute_pipeline_owner.free(p_id); + } else if (acceleration_structure_owner.owns(p_id)) { + AccelerationStructure *acceleration_structure = acceleration_structure_owner.get_or_null(p_id); + frames[frame].acceleration_structures_to_dispose_of.push_back(*acceleration_structure); + acceleration_structure_owner.free(p_id); + } else if (raytracing_pipeline_owner.owns(p_id)) { + RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_id); + frames[frame].raytracing_pipelines_to_dispose_of.push_back(*pipeline); + raytracing_pipeline_owner.free(p_id); } else { #ifdef DEV_ENABLED ERR_PRINT("Attempted to free invalid ID: " + itos(p_id.get_id()) + " " + resource_name); @@ -6459,6 +7042,9 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } else if (storage_buffer_owner.owns(p_id)) { Buffer *storage_buffer = storage_buffer_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_BUFFER, storage_buffer->driver_id, p_name); + } else if (instances_buffer_owner.owns(p_id)) { + InstancesBuffer *instances_buffer = instances_buffer_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_BUFFER, instances_buffer->buffer.driver_id, p_name); } else if (uniform_set_owner.owns(p_id)) { UniformSet *uniform_set = uniform_set_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_UNIFORM_SET, uniform_set->driver_id, p_name); @@ -6468,6 +7054,12 @@ void RenderingDevice::set_resource_name(RID p_id, const String &p_name) { } else if (compute_pipeline_owner.owns(p_id)) { ComputePipeline *pipeline = compute_pipeline_owner.get_or_null(p_id); driver->set_object_name(RDD::OBJECT_TYPE_PIPELINE, pipeline->driver_id, p_name); + } else if (acceleration_structure_owner.owns(p_id)) { + AccelerationStructure *acceleration_structure = acceleration_structure_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_ACCELERATION_STRUCTURE, acceleration_structure->driver_id, p_name); + } else if (raytracing_pipeline_owner.owns(p_id)) { + RaytracingPipeline *pipeline = raytracing_pipeline_owner.get_or_null(p_id); + driver->set_object_name(RDD::OBJECT_TYPE_RAYTRACING_PIPELINE, pipeline->driver_id, p_name); } else { ERR_PRINT("Attempted to name invalid ID: " + itos(p_id.get_id())); return; @@ -6585,6 +7177,26 @@ void RenderingDevice::_free_pending_resources(int p_frame) { frames[p_frame].compute_pipelines_to_dispose_of.pop_front(); } + while (frames[p_frame].raytracing_pipelines_to_dispose_of.front()) { + RaytracingPipeline *pipeline = &frames[p_frame].raytracing_pipelines_to_dispose_of.front()->get(); + + driver->raytracing_pipeline_free(pipeline->driver_id); + + frames[p_frame].raytracing_pipelines_to_dispose_of.pop_front(); + } + + // Acceleration structures. + while (frames[p_frame].acceleration_structures_to_dispose_of.front()) { + AccelerationStructure &acceleration_structure = frames[p_frame].acceleration_structures_to_dispose_of.front()->get(); + + if (acceleration_structure.scratch_buffer != RID()) { + free_rid(acceleration_structure.scratch_buffer); + } + driver->acceleration_structure_free(acceleration_structure.driver_id); + + frames[p_frame].acceleration_structures_to_dispose_of.pop_front(); + } + // Uniform sets. while (frames[p_frame].uniform_sets_to_dispose_of.front()) { UniformSet *uniform_set = &frames[p_frame].uniform_sets_to_dispose_of.front()->get(); @@ -6733,6 +7345,10 @@ void RenderingDevice::_end_frame() { ERR_PRINT("Found open compute list at the end of the frame, this should never happen (further compute will likely not work)."); } + if (raytracing_list.active) { + ERR_PRINT("Found open raytracing list at the end of the frame, this should never happen (further raytracing will likely not work)."); + } + // The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use. RDD::CommandBufferID command_buffer = frames[frame].command_buffer; GodotProfileZoneGroupedFirst(_profile_zone, "_submit_transfer_workers"); @@ -7187,6 +7803,7 @@ Error RenderingDevice::initialize(RenderingContextDriver *p_context, DisplayServ draw_list = DrawList(); compute_list = ComputeList(); + raytracing_list = RaytracingList(); bool project_pipeline_cache_enable = GLOBAL_GET("rendering/rendering_device/pipeline_cache/enable"); if (is_main_instance && project_pipeline_cache_enable) { @@ -7312,6 +7929,7 @@ void RenderingDevice::capture_timestamp(const String &p_name) { ERR_FAIL_COND_MSG(draw_list.active && draw_list.state.draw_count > 0, "Capturing timestamps during draw list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(compute_list.active && compute_list.state.dispatch_count > 0, "Capturing timestamps during compute list creation is not allowed. Offending timestamp was: " + p_name); + ERR_FAIL_COND_MSG(raytracing_list.active && raytracing_list.state.trace_count > 0, "Capturing timestamps during raytracing list creation is not allowed. Offending timestamp was: " + p_name); ERR_FAIL_COND_MSG(frames[frame].timestamp_count >= max_timestamp_query_elements, vformat("Tried capturing more timestamps than the configured maximum (%d). You can increase this limit in the project settings under 'Debug/Settings' called 'Max Timestamp Query Elements'.", max_timestamp_query_elements)); draw_graph.add_capture_timestamp(frames[frame].timestamp_pool, frames[frame].timestamp_count); @@ -7368,6 +7986,8 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r buffer = texture_buffer_owner.get_or_null(p_rid); } else if (storage_buffer_owner.owns(p_rid)) { buffer = storage_buffer_owner.get_or_null(p_rid); + } else if (instances_buffer_owner.owns(p_rid)) { + buffer = &instances_buffer_owner.get_or_null(p_rid)->buffer; } ERR_FAIL_NULL_V(buffer, 0); @@ -7489,6 +8109,7 @@ void RenderingDevice::finalize() { _free_rids(uniform_set_owner, "UniformSet"); _free_rids(texture_buffer_owner, "TextureBuffer"); _free_rids(storage_buffer_owner, "StorageBuffer"); + _free_rids(instances_buffer_owner, "InstancesBuffer"); _free_rids(uniform_buffer_owner, "UniformBuffer"); _free_rids(shader_owner, "Shader"); _free_rids(index_array_owner, "IndexArray"); @@ -7731,6 +8352,15 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_compute_pipeline_create, DEFVAL(TypedArray())); ClassDB::bind_method(D_METHOD("compute_pipeline_is_valid", "compute_pipeline"), &RenderingDevice::compute_pipeline_is_valid); + ClassDB::bind_method(D_METHOD("raytracing_pipeline_create", "shader", "specialization_constants"), &RenderingDevice::_raytracing_pipeline_create, DEFVAL(TypedArray())); + ClassDB::bind_method(D_METHOD("raytracing_pipeline_is_valid", "raytracing_pipeline"), &RenderingDevice::raytracing_pipeline_is_valid); + + ClassDB::bind_method(D_METHOD("blas_create", "vertex_array", "index_array", "geometry_bits", "position_attribute_location"), &RenderingDevice::blas_create, DEFVAL(0), DEFVAL(0)); + ClassDB::bind_method(D_METHOD("tlas_instances_buffer_create", "instance_count", "creation_bits"), &RenderingDevice::tlas_instances_buffer_create, DEFVAL(0)); + ClassDB::bind_method(D_METHOD("tlas_instances_buffer_fill", "instances_buffer", "blases", "transforms"), &RenderingDevice::_tlas_instances_buffer_fill); + ClassDB::bind_method(D_METHOD("tlas_create", "instances_buffer"), &RenderingDevice::tlas_create); + ClassDB::bind_method(D_METHOD("acceleration_structure_build", "acceleration_structure"), &RenderingDevice::acceleration_structure_build); + ClassDB::bind_method(D_METHOD("screen_get_width", "screen"), &RenderingDevice::screen_get_width, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_height", "screen"), &RenderingDevice::screen_get_height, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); ClassDB::bind_method(D_METHOD("screen_get_framebuffer_format", "screen"), &RenderingDevice::screen_get_framebuffer_format, DEFVAL(DisplayServer::MAIN_WINDOW_ID)); @@ -7772,6 +8402,13 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("compute_list_add_barrier", "compute_list"), &RenderingDevice::compute_list_add_barrier); ClassDB::bind_method(D_METHOD("compute_list_end"), &RenderingDevice::compute_list_end); + ClassDB::bind_method(D_METHOD("raytracing_list_begin"), &RenderingDevice::raytracing_list_begin); + ClassDB::bind_method(D_METHOD("raytracing_list_bind_raytracing_pipeline", "raytracing_list", "raytracing_pipeline"), &RenderingDevice::raytracing_list_bind_raytracing_pipeline); + ClassDB::bind_method(D_METHOD("raytracing_list_set_push_constant", "raytracing_list", "buffer", "size_bytes"), &RenderingDevice::_raytracing_list_set_push_constant); + ClassDB::bind_method(D_METHOD("raytracing_list_bind_uniform_set", "raytracing_list", "uniform_set", "set_index"), &RenderingDevice::raytracing_list_bind_uniform_set); + ClassDB::bind_method(D_METHOD("raytracing_list_trace_rays", "raytracing_list", "width", "height"), &RenderingDevice::raytracing_list_trace_rays); + ClassDB::bind_method(D_METHOD("raytracing_list_end"), &RenderingDevice::raytracing_list_end); + ClassDB::bind_method(D_METHOD("free_rid", "rid"), &RenderingDevice::free_rid); ClassDB::bind_method(D_METHOD("capture_timestamp", "name"), &RenderingDevice::capture_timestamp); @@ -8176,6 +8813,10 @@ void RenderingDevice::_bind_methods() { BIND_BITFIELD_FLAG(BUFFER_CREATION_AS_STORAGE_BIT); // Not exposed on purpose. This flag is too dangerous to be exposed to regular GD users. //BIND_BITFIELD_FLAG(BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT); + BIND_BITFIELD_FLAG(BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT); + + BIND_BITFIELD_FLAG(ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE); + BIND_BITFIELD_FLAG(ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION); BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER); //for sampling only (sampler GLSL type) BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER_WITH_TEXTURE); // for sampling only); but includes a texture); (samplerXX GLSL type)); first a sampler then a texture @@ -8189,6 +8830,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(UNIFORM_TYPE_INPUT_ATTACHMENT); //used for sub-pass read/write); for mobile mostly BIND_ENUM_CONSTANT(UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC); // Exposed in case a BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT buffer created by C++ makes it into GD users. BIND_ENUM_CONSTANT(UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC); // Exposed in case a BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT buffer created by C++ makes it into GD users. + BIND_ENUM_CONSTANT(UNIFORM_TYPE_ACCELERATION_STRUCTURE); //acceleration structure (TLAS)); for raytracing BIND_ENUM_CONSTANT(UNIFORM_TYPE_MAX); BIND_ENUM_CONSTANT(RENDER_PRIMITIVE_POINTS); @@ -8308,12 +8950,22 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE); + BIND_ENUM_CONSTANT(SHADER_STAGE_RAYGEN); + BIND_ENUM_CONSTANT(SHADER_STAGE_ANY_HIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_CLOSEST_HIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MISS); + BIND_ENUM_CONSTANT(SHADER_STAGE_INTERSECTION); BIND_ENUM_CONSTANT(SHADER_STAGE_MAX); BIND_ENUM_CONSTANT(SHADER_STAGE_VERTEX_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_FRAGMENT_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_CONTROL_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_TESSELATION_EVALUATION_BIT); BIND_ENUM_CONSTANT(SHADER_STAGE_COMPUTE_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_RAYGEN_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_ANY_HIT_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_CLOSEST_HIT_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_MISS_BIT); + BIND_ENUM_CONSTANT(SHADER_STAGE_INTERSECTION_BIT); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_GLSL); BIND_ENUM_CONSTANT(SHADER_LANGUAGE_HLSL); @@ -8326,6 +8978,8 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SUPPORTS_METALFX_TEMPORAL); BIND_ENUM_CONSTANT(SUPPORTS_BUFFER_DEVICE_ADDRESS); BIND_ENUM_CONSTANT(SUPPORTS_IMAGE_ATOMIC_32_BIT); + BIND_ENUM_CONSTANT(SUPPORTS_RAY_QUERY); + BIND_ENUM_CONSTANT(SUPPORTS_RAYTRACING_PIPELINE); BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS); BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS); @@ -8637,6 +9291,16 @@ Error RenderingDevice::_buffer_update_bind(RID p_buffer, uint32_t p_offset, uint return buffer_update(p_buffer, p_offset, p_size, p_data.ptr()); } +void RenderingDevice::_tlas_instances_buffer_fill(RID p_instances_buffer, const TypedArray &p_blases, const TypedArray &p_transforms) { + Vector blases = Variant(p_blases); + Vector transforms; + transforms.resize(p_transforms.size()); + for (int i = 0; i < p_transforms.size(); i++) { + transforms.write[i] = p_transforms[i]; + } + tlas_instances_buffer_fill(p_instances_buffer, blases, transforms); +} + static Vector _get_spec_constants(const TypedArray &p_constants) { Vector ret; ret.resize(p_constants.size()); @@ -8705,6 +9369,10 @@ RID RenderingDevice::_compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants = TypedArray()) { + return raytracing_pipeline_create(p_shader, _get_spec_constants(p_specialization_constants)); +} + #ifndef DISABLE_DEPRECATED Vector RenderingDevice::_draw_list_begin_split(RID p_framebuffer, uint32_t p_splits, InitialAction p_initial_color_action, FinalAction p_final_color_action, InitialAction p_initial_depth_action, FinalAction p_final_depth_action, const Vector &p_clear_color_values, float p_clear_depth, uint32_t p_clear_stencil, const Rect2 &p_region, const TypedArray &p_storage_textures) { ERR_FAIL_V_MSG(Vector(), "Deprecated. Split draw lists are used automatically by RenderingDevice."); @@ -8725,6 +9393,11 @@ void RenderingDevice::_compute_list_set_push_constant(ComputeListID p_list, cons compute_list_set_push_constant(p_list, p_data.ptr(), p_data_size); } +void RenderingDevice::_raytracing_list_set_push_constant(RaytracingListID p_list, const Vector &p_data, uint32_t p_data_size) { + ERR_FAIL_COND(p_data_size > (uint32_t)p_data.size()); + raytracing_list_set_push_constant(p_list, p_data.ptr(), p_data_size); +} + static_assert(ENUM_MEMBERS_EQUAL(RD::CALLBACK_RESOURCE_USAGE_NONE, RDG::RESOURCE_USAGE_NONE)); static_assert(ENUM_MEMBERS_EQUAL(RD::CALLBACK_RESOURCE_USAGE_COPY_FROM, RDG::RESOURCE_USAGE_COPY_FROM)); static_assert(ENUM_MEMBERS_EQUAL(RD::CALLBACK_RESOURCE_USAGE_COPY_TO, RDG::RESOURCE_USAGE_COPY_TO)); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 13dbeaaf4c69..1f8a0cef46b5 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -67,6 +67,7 @@ class RenderingDevice : public RenderingDeviceCommons { public: typedef int64_t DrawListID; typedef int64_t ComputeListID; + typedef int64_t RaytracingListID; typedef void (*InvalidationCallback)(void *); @@ -103,6 +104,7 @@ class RenderingDevice : public RenderingDeviceCommons { ID_TYPE_VERTEX_FORMAT, ID_TYPE_DRAW_LIST, ID_TYPE_COMPUTE_LIST = 4, + ID_TYPE_RAYTRACING_LIST = 5, ID_TYPE_MAX, ID_BASE_SHIFT = 58, // 5 bits for ID types. ID_MASK = (ID_BASE_SHIFT - 1), @@ -291,6 +293,9 @@ class RenderingDevice : public RenderingDeviceCommons { CALLBACK_RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ, CALLBACK_RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ, CALLBACK_RESOURCE_USAGE_GENERAL, + CALLBACK_RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT, + CALLBACK_RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ, + CALLBACK_RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE, CALLBACK_RESOURCE_USAGE_MAX }; @@ -841,6 +846,7 @@ class RenderingDevice : public RenderingDeviceCommons { BUFFER_CREATION_DEVICE_ADDRESS_BIT = (1 << 0), BUFFER_CREATION_AS_STORAGE_BIT = (1 << 1), BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT = (1 << 2), + BUFFER_CREATION_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT = (1 << 3), }; enum StorageBufferUsage { @@ -863,6 +869,9 @@ class RenderingDevice : public RenderingDeviceCommons { RID index_array_create(RID p_index_buffer, uint32_t p_index_offset, uint32_t p_index_count); +private: + BitField _creation_to_usage_bits(BitField p_creation_bits); + /****************/ /**** SHADER ****/ /****************/ @@ -1230,6 +1239,17 @@ class RenderingDevice : public RenderingDeviceCommons { RID_Owner compute_pipeline_owner; + struct RaytracingPipeline { + RID shader; + RDD::ShaderID shader_driver_id; + uint32_t shader_layout_hash = 0; + Vector set_formats; + RDD::RaytracingPipelineID driver_id; + uint32_t push_constant_size = 0; + }; + + RID_Owner raytracing_pipeline_owner; + public: RID render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const PipelineRasterizationState &p_rasterization_state, const PipelineMultisampleState &p_multisample_state, const PipelineDepthStencilState &p_depth_stencil_state, const PipelineColorBlendState &p_blend_state, BitField p_dynamic_state_flags = 0, uint32_t p_for_render_pass = 0, const Vector &p_specialization_constants = Vector()); bool render_pipeline_is_valid(RID p_pipeline); @@ -1237,6 +1257,9 @@ class RenderingDevice : public RenderingDeviceCommons { RID compute_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); bool compute_pipeline_is_valid(RID p_pipeline); + RID raytracing_pipeline_create(RID p_shader, const Vector &p_specialization_constants = Vector()); + bool raytracing_pipeline_is_valid(RID p_pipeline); + void update_pipeline_cache(bool p_closing = false); private: @@ -1257,6 +1280,45 @@ class RenderingDevice : public RenderingDeviceCommons { FramebufferFormatID screen_get_framebuffer_format(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID) const; Error screen_free(DisplayServer::WindowID p_screen = DisplayServer::MAIN_WINDOW_ID); +private: + /********************************/ + /**** ACCELERATION STRUCTURE ****/ + /********************************/ + + struct InstancesBuffer { + Buffer buffer; + uint32_t instance_count; + Vector blases; + }; + + struct AccelerationStructure { + RDD::AccelerationStructureID driver_id; + RDD::AccelerationStructureType type = RDD::ACCELERATION_STRUCTURE_TYPE_BLAS; + RDG::ResourceTracker *draw_tracker = nullptr; + Vector draw_trackers; + + RID scratch_buffer; + RID vertex_array; + RID index_array; + RID transform_buffer; + RID instances_buffer; + }; + + RID_Owner instances_buffer_owner; + RID_Owner acceleration_structure_owner; + +public: + enum AccelerationStructureGeometryBits { + ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE = (1 << 0), + ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION = (1 << 1), + }; + + RID blas_create(RID p_vertex_array, RID p_index_array, BitField p_geometry_bits = 0, uint32_t p_position_attribute_location = 0); + RID tlas_instances_buffer_create(uint32_t p_instance_count, BitField p_creation_bits = 0); + void tlas_instances_buffer_fill(RID p_buffer, const Vector &p_blases, VectorView p_transforms); + RID tlas_create(RID p_instances_buffer); + Error acceleration_structure_build(RID p_acceleration_structure); + /*************************/ /**** DRAW LISTS (II) ****/ /*************************/ @@ -1399,6 +1461,61 @@ class RenderingDevice : public RenderingDeviceCommons { void draw_list_end(); +private: + /**************************/ + /**** RAYTRACING LISTS ****/ + /**************************/ + + struct RaytracingList { + bool active = false; + struct SetState { + uint32_t pipeline_expected_format = 0; + uint32_t uniform_set_format = 0; + RDD::UniformSetID uniform_set_driver_id; + RID uniform_set; + bool bound = false; + }; + + struct State { + SetState sets[MAX_UNIFORM_SETS]; + uint32_t set_count = 0; + RID pipeline; + RDD::RaytracingPipelineID pipeline_driver_id; + RID pipeline_shader; + RDD::ShaderID pipeline_shader_driver_id; + uint32_t pipeline_shader_layout_hash = 0; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE] = {}; + uint32_t push_constant_size = 0; + uint32_t trace_count = 0; + } state; + +#ifdef DEBUG_ENABLED + struct Validation { + bool active = true; // Means command buffer was not closed, so you can keep adding things. + Vector set_formats; + Vector set_bound; + Vector set_rids; + // Last pipeline set values. + bool pipeline_active = false; + RID pipeline_shader; + uint32_t invalid_set_from = 0; + uint32_t pipeline_push_constant_size = 0; + bool pipeline_push_constant_supplied = false; + } validation; +#endif + }; + + RaytracingList raytracing_list; + RaytracingList::State raytracing_list_barrier_state; + +public: + RaytracingListID raytracing_list_begin(); + void raytracing_list_bind_raytracing_pipeline(RaytracingListID p_list, RID p_raytracing_pipeline); + void raytracing_list_bind_uniform_set(RaytracingListID p_list, RID p_uniform_set, uint32_t p_index); + void raytracing_list_set_push_constant(RaytracingListID p_list, const void *p_data, uint32_t p_data_size); + void raytracing_list_trace_rays(RaytracingListID p_list, uint32_t p_width, uint32_t p_height); + void raytracing_list_end(); + private: /***********************/ /**** COMPUTE LISTS ****/ @@ -1560,6 +1677,8 @@ class RenderingDevice : public RenderingDeviceCommons { List uniform_sets_to_dispose_of; List render_pipelines_to_dispose_of; List compute_pipelines_to_dispose_of; + List acceleration_structures_to_dispose_of; + List raytracing_pipelines_to_dispose_of; // Pending asynchronous data transfer for buffers. LocalVector download_buffer_staging_buffers; @@ -1776,11 +1895,15 @@ class RenderingDevice : public RenderingDeviceCommons { Error _buffer_update_bind(RID p_buffer, uint32_t p_offset, uint32_t p_size, const Vector &p_data); + void _tlas_instances_buffer_fill(RID p_buffer, const TypedArray &p_blases, const TypedArray &p_transforms); + RID _render_pipeline_create(RID p_shader, FramebufferFormatID p_framebuffer_format, VertexFormatID p_vertex_format, RenderPrimitive p_render_primitive, const Ref &p_rasterization_state, const Ref &p_multisample_state, const Ref &p_depth_stencil_state, const Ref &p_blend_state, BitField p_dynamic_state_flags, uint32_t p_for_render_pass, const TypedArray &p_specialization_constants); RID _compute_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); + RID _raytracing_pipeline_create(RID p_shader, const TypedArray &p_specialization_constants); void _draw_list_set_push_constant(DrawListID p_list, const Vector &p_data, uint32_t p_data_size); void _compute_list_set_push_constant(ComputeListID p_list, const Vector &p_data, uint32_t p_data_size); + void _raytracing_list_set_push_constant(RaytracingListID p_list, const Vector &p_data, uint32_t p_data_size); }; VARIANT_ENUM_CAST(RenderingDevice::DeviceType) @@ -1801,6 +1924,7 @@ VARIANT_ENUM_CAST(RenderingDevice::VertexFrequency) VARIANT_ENUM_CAST(RenderingDevice::IndexBufferFormat) VARIANT_BITFIELD_CAST(RenderingDevice::StorageBufferUsage) VARIANT_BITFIELD_CAST(RenderingDevice::BufferCreationBits) +VARIANT_BITFIELD_CAST(RenderingDevice::AccelerationStructureGeometryBits) VARIANT_ENUM_CAST(RenderingDevice::UniformType) VARIANT_ENUM_CAST(RenderingDevice::RenderPrimitive) VARIANT_ENUM_CAST(RenderingDevice::PolygonCullMode) diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index 14e38682b943..f170ae116454 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -41,7 +41,7 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String Vector lines = p_text.split("\n"); bool reading_versions = false; - bool stage_found[RD::SHADER_STAGE_MAX] = { false, false, false, false, false }; + bool stage_found[RD::SHADER_STAGE_MAX] = {}; RD::ShaderStage stage = RD::SHADER_STAGE_MAX; static const char *stage_str[RD::SHADER_STAGE_MAX] = { "vertex", @@ -49,6 +49,11 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String "tesselation_control", "tesselation_evaluation", "compute", + "raygen", + "any_hit", + "closest_hit", + "miss", + "intersection", }; String stage_code[RD::SHADER_STAGE_MAX]; int stages_found = 0; @@ -193,7 +198,7 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String code = code.replace("VERSION_DEFINES", E.value); String error; #ifdef MODULE_GLSLANG_ENABLED - Vector spirv = compile_glslang_shader(RD::ShaderStage(i), ShaderIncludeDB::parse_include_files(code), RD::SHADER_LANGUAGE_VULKAN_VERSION_1_1, RD::SHADER_SPIRV_VERSION_1_3, &error); + Vector spirv = compile_glslang_shader(RD::ShaderStage(i), ShaderIncludeDB::parse_include_files(code), RD::SHADER_LANGUAGE_VULKAN_VERSION_1_1, RD::SHADER_SPIRV_VERSION_1_4, &error); bytecode->set_stage_bytecode(RD::ShaderStage(i), spirv); #else error = "Shader compilation is not supported because glslang was not enabled."; diff --git a/servers/rendering/rendering_device_binds.h b/servers/rendering/rendering_device_binds.h index 9d8c81e3a88e..f19a8f64ec24 100644 --- a/servers/rendering/rendering_device_binds.h +++ b/servers/rendering/rendering_device_binds.h @@ -276,6 +276,11 @@ class RDShaderSource : public RefCounted { ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_control"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_tesselation_evaluation"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_compute"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_raygen"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_any_hit"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_ANY_HIT); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_closest_hit"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_CLOSEST_HIT); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_miss"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "source_intersection"), "set_stage_source", "get_stage_source", RD::SHADER_STAGE_INTERSECTION); ADD_GROUP("Syntax", "source_"); ADD_PROPERTY(PropertyInfo(Variant::INT, "language", PROPERTY_HINT_RANGE, "GLSL,HLSL"), "set_language", "get_language"); } @@ -335,12 +340,22 @@ class RDShaderSPIRV : public Resource { ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_control"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_tesselation_evaluation"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_compute"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_raygen"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_any_hit"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_ANY_HIT); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_closest_hit"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_CLOSEST_HIT); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_miss"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "bytecode_intersection"), "set_stage_bytecode", "get_stage_bytecode", RD::SHADER_STAGE_INTERSECTION); ADD_GROUP("Compile Error", "compile_error_"); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_vertex"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_VERTEX); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_fragment"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_FRAGMENT); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_control"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_CONTROL); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_tesselation_evaluation"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_TESSELATION_EVALUATION); ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_compute"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_COMPUTE); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_raygen"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_RAYGEN); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_any_hit"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_ANY_HIT); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_closest_hit"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_CLOSEST_HIT); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_miss"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_MISS); + ADD_PROPERTYI(PropertyInfo(Variant::STRING, "compile_error_intersection"), "set_stage_compile_error", "get_stage_compile_error", RD::SHADER_STAGE_INTERSECTION); } }; diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 7948f85dc617..7d5197116f62 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -586,12 +586,22 @@ class RenderingDeviceCommons : public Object { SHADER_STAGE_TESSELATION_CONTROL, SHADER_STAGE_TESSELATION_EVALUATION, SHADER_STAGE_COMPUTE, + SHADER_STAGE_RAYGEN, + SHADER_STAGE_ANY_HIT, + SHADER_STAGE_CLOSEST_HIT, + SHADER_STAGE_MISS, + SHADER_STAGE_INTERSECTION, SHADER_STAGE_MAX, SHADER_STAGE_VERTEX_BIT = (1 << SHADER_STAGE_VERTEX), SHADER_STAGE_FRAGMENT_BIT = (1 << SHADER_STAGE_FRAGMENT), SHADER_STAGE_TESSELATION_CONTROL_BIT = (1 << SHADER_STAGE_TESSELATION_CONTROL), SHADER_STAGE_TESSELATION_EVALUATION_BIT = (1 << SHADER_STAGE_TESSELATION_EVALUATION), SHADER_STAGE_COMPUTE_BIT = (1 << SHADER_STAGE_COMPUTE), + SHADER_STAGE_RAYGEN_BIT = (1 << SHADER_STAGE_RAYGEN), + SHADER_STAGE_ANY_HIT_BIT = (1 << SHADER_STAGE_ANY_HIT), + SHADER_STAGE_CLOSEST_HIT_BIT = (1 << SHADER_STAGE_CLOSEST_HIT), + SHADER_STAGE_MISS_BIT = (1 << SHADER_STAGE_MISS), + SHADER_STAGE_INTERSECTION_BIT = (1 << SHADER_STAGE_INTERSECTION), }; enum ShaderLanguage { @@ -644,6 +654,7 @@ class RenderingDeviceCommons : public Object { UNIFORM_TYPE_INPUT_ATTACHMENT, // Used for sub-pass read/write, for mobile mostly. UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC, // Same as UNIFORM but created with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC, // Same as STORAGE but created with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. + UNIFORM_TYPE_ACCELERATION_STRUCTURE, // Bounding Volume Hierarchy (Top + Bottom Level acceleration structures), for raytracing only. UNIFORM_TYPE_MAX }; @@ -673,6 +684,13 @@ class RenderingDeviceCommons : public Object { // ----- PIPELINE ----- + // Rendering Shader Container expects this type to be 4 bytes for proper alignment with the shaders. + enum PipelineType : uint32_t { + PIPELINE_TYPE_RASTERIZATION, + PIPELINE_TYPE_COMPUTE, + PIPELINE_TYPE_RAYTRACING, + }; + enum RenderPrimitive { RENDER_PRIMITIVE_POINTS, RENDER_PRIMITIVE_LINES, @@ -974,6 +992,8 @@ class RenderingDeviceCommons : public Object { SUPPORTS_VULKAN_MEMORY_MODEL, SUPPORTS_FRAMEBUFFER_DEPTH_RESOLVE, SUPPORTS_POINT_SIZE, + SUPPORTS_RAY_QUERY, + SUPPORTS_RAYTRACING_PIPELINE, }; enum SubgroupOperations { @@ -1081,7 +1101,7 @@ class RenderingDeviceCommons : public Object { struct ShaderReflection { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; - bool is_compute = false; + PipelineType pipeline_type = PIPELINE_TYPE_RASTERIZATION; bool has_multiview = false; bool has_dynamic_buffers = false; uint32_t compute_local_size[3] = {}; diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 1e1ffc8a81a5..890517c33e0a 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -140,6 +140,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { DEFINE_ID(QueryPool); DEFINE_ID(Fence); DEFINE_ID(Semaphore); + DEFINE_ID(AccelerationStructure); + DEFINE_ID(RaytracingPipeline); public: /*****************/ @@ -170,7 +172,10 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BUFFER_USAGE_INDEX_BIT = (1 << 6), BUFFER_USAGE_VERTEX_BIT = (1 << 7), BUFFER_USAGE_INDIRECT_BIT = (1 << 8), + BUFFER_USAGE_SHADER_BINDING_TABLE_BIT = (1 << 10), BUFFER_USAGE_DEVICE_ADDRESS_BIT = (1 << 17), + BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT = (1 << 19), + BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT = (1 << 20), // There are no Vulkan-equivalent. Try to use unused/unclaimed bits. BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT = (1 << 31), }; @@ -325,8 +330,10 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { PIPELINE_STAGE_ALL_GRAPHICS_BIT = (1 << 15), PIPELINE_STAGE_ALL_COMMANDS_BIT = (1 << 16), PIPELINE_STAGE_CLEAR_STORAGE_BIT = (1 << 17), + PIPELINE_STAGE_RAY_TRACING_SHADER_BIT = (1 << 21), PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT = (1 << 22), PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT = (1 << 23), + PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT = (1 << 25), }; enum BarrierAccessBits { @@ -347,6 +354,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { BARRIER_ACCESS_HOST_WRITE_BIT = (1 << 14), BARRIER_ACCESS_MEMORY_READ_BIT = (1 << 15), BARRIER_ACCESS_MEMORY_WRITE_BIT = (1 << 16), + BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT = (1 << 21), + BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT = (1 << 22), BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT = (1 << 23), BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT = (1 << 24), BARRIER_ACCESS_RESOLVE_READ_BIT = (1 << 25), @@ -377,13 +386,22 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { TextureSubresourceRange subresources; }; + struct AccelerationStructureBarrier { + AccelerationStructureID acceleration_structure; + BitField src_access; + BitField dst_access; + uint64_t offset = 0; + uint64_t size = 0; + }; + virtual void command_pipeline_barrier( CommandBufferID p_cmd_buffer, BitField p_src_stages, BitField p_dst_stages, VectorView p_memory_barriers, VectorView p_buffer_barriers, - VectorView p_texture_barriers) = 0; + VectorView p_texture_barriers, + VectorView p_acceleration_structure_barriers) = 0; /****************/ /**** FENCES ****/ @@ -715,6 +733,41 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { virtual PipelineID compute_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) = 0; + /********************/ + /**** RAYTRACING ****/ + /********************/ + + // ----- ACCELERATION STRUCTURE ----- + + enum AccelerationStructureType { + ACCELERATION_STRUCTURE_TYPE_BLAS, + ACCELERATION_STRUCTURE_TYPE_TLAS, + }; + + enum AccelerationStructureGeometryBits { + ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE = 1 << 0, + ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION = 1 << 1, + }; + + virtual AccelerationStructureID blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset, uint32_t p_index_count, BitField p_geometry_bits) = 0; + virtual uint32_t tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) = 0; + virtual void tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView p_blases, VectorView p_transforms) = 0; + virtual AccelerationStructureID tlas_create(BufferID p_instances_buffer) = 0; + virtual void acceleration_structure_free(AccelerationStructureID p_acceleration_structure) = 0; + virtual uint32_t acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) = 0; + + // ----- PIPELINE ----- + + virtual RaytracingPipelineID raytracing_pipeline_create(ShaderID p_shader, VectorView p_specialization_constants) = 0; + virtual void raytracing_pipeline_free(RaytracingPipelineID p_pipeline) = 0; + + // ----- COMMANDS ----- + + virtual void command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) = 0; + virtual void command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) = 0; + virtual void command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) = 0; + virtual void command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) = 0; + /******************/ /**** CALLBACK ****/ /******************/ @@ -767,6 +820,8 @@ class RenderingDeviceDriver : public RenderingDeviceCommons { OBJECT_TYPE_SHADER, OBJECT_TYPE_UNIFORM_SET, OBJECT_TYPE_PIPELINE, + OBJECT_TYPE_ACCELERATION_STRUCTURE, + OBJECT_TYPE_RAYTRACING_PIPELINE, }; struct MultiviewCapabilities { diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index 8d8c2a0d4dea..530c90214921 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -105,6 +105,8 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_STORAGE_IMAGE_READ: case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ: case RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ: return false; case RESOURCE_USAGE_COPY_TO: case RESOURCE_USAGE_RESOLVE_TO: @@ -114,6 +116,7 @@ bool RenderingDeviceGraph::_is_write_usage(ResourceUsage p_usage) { case RESOURCE_USAGE_ATTACHMENT_COLOR_READ_WRITE: case RESOURCE_USAGE_ATTACHMENT_DEPTH_STENCIL_READ_WRITE: case RESOURCE_USAGE_GENERAL: + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE: return true; default: DEV_ASSERT(false && "Invalid resource tracker usage."); @@ -173,6 +176,12 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage return RDD::BARRIER_ACCESS_UNIFORM_READ_BIT; case RESOURCE_USAGE_INDIRECT_BUFFER_READ: return RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT; + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT: + // Acceleration structure build inputs can be either storage buffers with vertices, indices, transforms, or + // other acceleration structures (BLAS) + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_COPY_READ_BIT | RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT); + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ: + return RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT; case RESOURCE_USAGE_STORAGE_BUFFER_READ: case RESOURCE_USAGE_STORAGE_IMAGE_READ: case RESOURCE_USAGE_TEXTURE_BUFFER_READ: @@ -196,6 +205,8 @@ RDD::BarrierAccessBits RenderingDeviceGraph::_usage_to_access_bits(ResourceUsage return RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT; case RESOURCE_USAGE_GENERAL: return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT); + case RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE: + return RDD::BarrierAccessBits(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT | RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT); default: DEV_ASSERT(false && "Invalid usage."); return RDD::BarrierAccessBits(0); @@ -351,6 +362,12 @@ void RenderingDeviceGraph::_check_discardable_attachment_dependency(ResourceTrac } } +RenderingDeviceGraph::RaytracingListInstruction *RenderingDeviceGraph::_allocate_raytracing_list_instruction(uint32_t p_instruction_size) { + uint32_t raytracing_list_data_offset = raytracing_instruction_list.data.size(); + raytracing_instruction_list.data.resize(raytracing_list_data_offset + p_instruction_size); + return reinterpret_cast(&raytracing_instruction_list.data[raytracing_list_data_offset]); +} + void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) { // Assign the next stages derived from the stages the command requires first. r_command->next_stages = r_command->self_stages; @@ -568,6 +585,11 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr // Memory barriers are pushed regardless of buffer barriers being used or not. r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; + } else if (resource_tracker->acceleration_structure_driver_id.id != 0) { + // Make sure the acceleration structure has been built before accessing it from raytracing shaders. + _add_acceleration_structure_barrier_to_command(resource_tracker->acceleration_structure_driver_id, resource_tracker->usage_access, new_usage_access, command_acceleration_structure_barriers, r_command->acceleration_structure_barrier_index, r_command->acceleration_structure_barrier_count); + r_command->memory_barrier.src_access = r_command->memory_barrier.src_access | resource_tracker->usage_access; + r_command->memory_barrier.dst_access = r_command->memory_barrier.dst_access | new_usage_access; } else { DEV_ASSERT(false && "Resource tracker does not contain a valid buffer or texture ID."); } @@ -758,6 +780,66 @@ void RenderingDeviceGraph::_add_buffer_barrier_to_command(RDD::BufferID p_buffer } #endif +void RenderingDeviceGraph::_add_acceleration_structure_barrier_to_command(RDD::AccelerationStructureID p_acceleration_structure_id, BitField p_src_access, BitField p_dst_access, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count) { + if (!driver_honors_barriers) { + return; + } + + if (r_barrier_index < 0) { + r_barrier_index = r_barrier_vector.size(); + } + + RDD::AccelerationStructureBarrier accel_barrier; + accel_barrier.acceleration_structure = p_acceleration_structure_id; + accel_barrier.src_access = p_src_access; + accel_barrier.dst_access = p_dst_access; + accel_barrier.offset = 0; + accel_barrier.size = RDD::BUFFER_WHOLE_SIZE; + r_barrier_vector.push_back(accel_barrier); + r_barrier_count++; +} + +void RenderingDeviceGraph::_run_raytracing_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(RaytracingListInstruction)) <= p_instruction_data_size); + + const RaytracingListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case RaytracingListInstruction::TYPE_BIND_PIPELINE: { + const RaytracingListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + driver->command_bind_raytracing_pipeline(p_command_buffer, bind_pipeline_instruction->pipeline); + instruction_data_cursor += sizeof(RaytracingListBindPipelineInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_UNIFORM_SET: { + const RaytracingListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + driver->command_bind_raytracing_uniform_set(p_command_buffer, bind_uniform_set_instruction->uniform_set, bind_uniform_set_instruction->shader, bind_uniform_set_instruction->set_index); + instruction_data_cursor += sizeof(RaytracingListBindUniformSetInstruction); + } break; + case RaytracingListInstruction::TYPE_TRACE_RAYS: { + const RaytracingListTraceRaysInstruction *trace_rays_instruction = reinterpret_cast(instruction); + driver->command_trace_rays(p_command_buffer, trace_rays_instruction->width, trace_rays_instruction->height); + instruction_data_cursor += sizeof(RaytracingListTraceRaysInstruction); + } break; + case RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT: { + const RaytracingListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + const VectorView push_constant_data_view(reinterpret_cast(set_push_constant_instruction->data()), set_push_constant_instruction->size / sizeof(uint32_t)); + driver->command_bind_push_constants(p_command_buffer, set_push_constant_instruction->shader, 0, push_constant_data_view); + instruction_data_cursor += sizeof(RaytracingListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const RaytracingListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + driver->command_uniform_set_prepare_for_use(p_command_buffer, uniform_set_prepare_for_use_instruction->uniform_set, uniform_set_prepare_for_use_instruction->shader, uniform_set_prepare_for_use_instruction->set_index); + instruction_data_cursor += sizeof(RaytracingListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown raytracing list instruction type."); + return; + } + } +} + void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -1004,6 +1086,10 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC _run_label_command_change(r_command_buffer, command->label_index, p_level, false, true, &p_sorted_commands[i], p_sorted_commands_count - i, r_current_label_index, r_current_label_level); switch (command->type) { + case RecordedCommand::TYPE_ACCELERATION_STRUCTURE_BUILD: { + const RecordedAccelerationStructureBuildCommand *as_build_command = reinterpret_cast(command); + driver->command_build_acceleration_structure(r_command_buffer, as_build_command->acceleration_structure, as_build_command->scratch_buffer); + } break; case RecordedCommand::TYPE_BUFFER_CLEAR: { const RecordedBufferClearCommand *buffer_clear_command = reinterpret_cast(command); driver->command_clear_buffer(r_command_buffer, buffer_clear_command->buffer, buffer_clear_command->offset, buffer_clear_command->size); @@ -1027,6 +1113,10 @@ void RenderingDeviceGraph::_run_render_commands(int32_t p_level, const RecordedC const RecordedDriverCallbackCommand *driver_callback_command = reinterpret_cast(command); driver_callback_command->callback(driver, r_command_buffer, driver_callback_command->userdata); } break; + case RecordedCommand::TYPE_RAYTRACING_LIST: { + const RecordedRaytracingListCommand *raytracing_list_command = reinterpret_cast(command); + _run_raytracing_list_command(r_command_buffer, raytracing_list_command->instruction_data(), raytracing_list_command->instruction_data_size); + } break; case RecordedCommand::TYPE_COMPUTE_LIST: { if (device.workarounds.avoid_compute_after_draw && workarounds_state.draw_list_found) { // Avoid compute after draw workaround. Refer to the comment that enables this in the Vulkan driver for more information. @@ -1313,6 +1403,12 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe barrier_group.buffer_barriers.push_back(recorded_barrier); } #endif + + // Gather acceleration structure barriers. + for (int32_t j = 0; j < command->acceleration_structure_barrier_count; j++) { + const RDD::AccelerationStructureBarrier &recorded_barrier = command_acceleration_structure_barriers[command->acceleration_structure_barrier_index + j]; + barrier_group.acceleration_structure_barriers.push_back(recorded_barrier); + } } if (p_full_memory_barrier) { @@ -1329,7 +1425,8 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe #else const bool are_buffer_barriers_empty = true; #endif - if (is_memory_barrier_empty && are_texture_barriers_empty && are_buffer_barriers_empty) { + const bool are_acceleration_structure_barriers_empty = barrier_group.acceleration_structure_barriers.is_empty(); + if (is_memory_barrier_empty && are_texture_barriers_empty && are_buffer_barriers_empty && are_acceleration_structure_barriers_empty) { // Commands don't require synchronization. return; } @@ -1341,12 +1438,13 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe #else const VectorView buffer_barriers = VectorView(); #endif + const VectorView acceleration_structure_barriers = !are_acceleration_structure_barriers_empty ? barrier_group.acceleration_structure_barriers : VectorView(); - driver->command_pipeline_barrier(p_command_buffer, barrier_group.src_stages, barrier_group.dst_stages, memory_barriers, buffer_barriers, texture_barriers); + driver->command_pipeline_barrier(p_command_buffer, barrier_group.src_stages, barrier_group.dst_stages, memory_barriers, buffer_barriers, texture_barriers, acceleration_structure_barriers); bool separate_texture_barriers = !barrier_group.normalization_barriers.is_empty() && !barrier_group.transition_barriers.is_empty(); if (separate_texture_barriers) { - driver->command_pipeline_barrier(p_command_buffer, barrier_group.src_stages, barrier_group.dst_stages, VectorView(), VectorView(), barrier_group.transition_barriers); + driver->command_pipeline_barrier(p_command_buffer, barrier_group.src_stages, barrier_group.dst_stages, VectorView(), VectorView(), barrier_group.transition_barriers, VectorView()); } } @@ -1525,6 +1623,46 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u } } +void RenderingDeviceGraph::_print_raytracing_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { + uint32_t instruction_data_cursor = 0; + while (instruction_data_cursor < p_instruction_data_size) { + DEV_ASSERT((instruction_data_cursor + sizeof(RaytracingListInstruction)) <= p_instruction_data_size); + + const RaytracingListInstruction *instruction = reinterpret_cast(&p_instruction_data[instruction_data_cursor]); + switch (instruction->type) { + case RaytracingListInstruction::TYPE_BIND_PIPELINE: { + const RaytracingListBindPipelineInstruction *bind_pipeline_instruction = reinterpret_cast(instruction); + print_line("\tBIND PIPELINE ID", itos(bind_pipeline_instruction->pipeline.id)); + instruction_data_cursor += sizeof(RaytracingListBindPipelineInstruction); + } break; + case RaytracingListInstruction::TYPE_BIND_UNIFORM_SET: { + const RaytracingListBindUniformSetInstruction *bind_uniform_set_instruction = reinterpret_cast(instruction); + print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_set_instruction->uniform_set.id), "SHADER ID", itos(bind_uniform_set_instruction->shader.id)); + instruction_data_cursor += sizeof(RaytracingListBindUniformSetInstruction); + } break; + case RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT: { + const RaytracingListSetPushConstantInstruction *set_push_constant_instruction = reinterpret_cast(instruction); + print_line("\tSET PUSH CONSTANT SIZE", set_push_constant_instruction->size); + instruction_data_cursor += sizeof(RaytracingListSetPushConstantInstruction); + instruction_data_cursor += set_push_constant_instruction->size; + } break; + case RaytracingListInstruction::TYPE_TRACE_RAYS: { + const RaytracingListTraceRaysInstruction *trace_rays_instruction = reinterpret_cast(instruction); + print_line("\tTRACE RAYS WIDTH", itos(trace_rays_instruction->width), "HEIGHT", itos(trace_rays_instruction->height)); + instruction_data_cursor += sizeof(RaytracingListTraceRaysInstruction); + } break; + case RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE: { + const RaytracingListUniformSetPrepareForUseInstruction *uniform_set_prepare_for_use_instruction = reinterpret_cast(instruction); + print_line("\tUNIFORM SET PREPARE FOR USE ID", itos(uniform_set_prepare_for_use_instruction->uniform_set.id), "SHADER ID", itos(uniform_set_prepare_for_use_instruction->shader.id), "INDEX", itos(uniform_set_prepare_for_use_instruction->set_index)); + instruction_data_cursor += sizeof(RaytracingListUniformSetPrepareForUseInstruction); + } break; + default: + DEV_ASSERT(false && "Unknown raytracing list instruction type."); + return; + } + } +} + void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) { uint32_t instruction_data_cursor = 0; while (instruction_data_cursor < p_instruction_data_size) { @@ -1623,6 +1761,7 @@ void RenderingDeviceGraph::begin() { command_normalization_barriers.clear(); command_transition_barriers.clear(); command_buffer_barriers.clear(); + command_acceleration_structure_barriers.clear(); command_label_chars.clear(); command_label_colors.clear(); command_label_offsets.clear(); @@ -1645,6 +1784,33 @@ void RenderingDeviceGraph::begin() { #endif } +void RenderingDeviceGraph::add_acceleration_structure_build(RDD::AccelerationStructureID p_acceleration_structure, RDD::BufferID p_scratch_buffer, ResourceTracker *p_dst_tracker, VectorView p_src_trackers) { + int32_t command_index; + RecordedAccelerationStructureBuildCommand *command = static_cast(_allocate_command(sizeof(RecordedAccelerationStructureBuildCommand), command_index)); + command->type = RecordedCommand::TYPE_ACCELERATION_STRUCTURE_BUILD; + command->self_stages = RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT; + command->acceleration_structure = p_acceleration_structure; + command->scratch_buffer = p_scratch_buffer; + + thread_local LocalVector trackers; + thread_local LocalVector usages; + + // Sources and destination. + uint32_t resource_count = p_src_trackers.size() + 1; + trackers.resize(resource_count); + usages.resize(resource_count); + + for (uint32_t i = 0; i < p_src_trackers.size(); ++i) { + trackers[i] = p_src_trackers[i]; + usages[i] = RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT; + } + + trackers[resource_count - 1] = p_dst_tracker; + usages[resource_count - 1] = RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE; + + _add_command_to_graph(trackers.ptr(), usages.ptr(), usages.size(), command_index, command); +} + void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size) { DEV_ASSERT(p_dst_tracker != nullptr); @@ -1727,6 +1893,88 @@ void RenderingDeviceGraph::add_driver_callback(RDD::DriverCallback p_callback, v _add_command_to_graph((ResourceTracker **)p_trackers.ptr(), (ResourceUsage *)p_usages.ptr(), p_trackers.size(), command_index, command); } +void RenderingDeviceGraph::add_raytracing_list_begin() { + raytracing_instruction_list.clear(); + raytracing_instruction_list.index++; +} + +void RenderingDeviceGraph::add_raytracing_list_bind_pipeline(RDD::RaytracingPipelineID p_pipeline) { + RaytracingListBindPipelineInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBindPipelineInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BIND_PIPELINE; + instruction->pipeline = p_pipeline; + raytracing_instruction_list.stages.set_flag(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT); +} + +void RenderingDeviceGraph::add_raytracing_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + RaytracingListBindUniformSetInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListBindUniformSetInstruction))); + instruction->type = RaytracingListInstruction::TYPE_BIND_UNIFORM_SET; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_raytracing_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size) { + uint32_t instruction_size = sizeof(RaytracingListSetPushConstantInstruction) + p_data_size; + RaytracingListSetPushConstantInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(instruction_size)); + instruction->type = RaytracingListInstruction::TYPE_SET_PUSH_CONSTANT; + instruction->size = p_data_size; + instruction->shader = p_shader; + memcpy(instruction->data(), p_data, p_data_size); +} + +void RenderingDeviceGraph::add_raytracing_list_trace_rays(uint32_t p_width, uint32_t p_height) { + RaytracingListTraceRaysInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListTraceRaysInstruction))); + instruction->type = RaytracingListInstruction::TYPE_TRACE_RAYS; + instruction->width = p_width; + instruction->height = p_height; +} + +void RenderingDeviceGraph::add_raytracing_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index) { + RaytracingListUniformSetPrepareForUseInstruction *instruction = reinterpret_cast(_allocate_raytracing_list_instruction(sizeof(RaytracingListUniformSetPrepareForUseInstruction))); + instruction->type = RaytracingListInstruction::TYPE_UNIFORM_SET_PREPARE_FOR_USE; + instruction->shader = p_shader; + instruction->uniform_set = p_uniform_set; + instruction->set_index = set_index; +} + +void RenderingDeviceGraph::add_raytracing_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage) { + DEV_ASSERT(p_tracker != nullptr); + + p_tracker->reset_if_outdated(tracking_frame); + + if (p_tracker->raytracing_list_index != raytracing_instruction_list.index) { + raytracing_instruction_list.command_trackers.push_back(p_tracker); + raytracing_instruction_list.command_tracker_usages.push_back(p_usage); + p_tracker->raytracing_list_index = raytracing_instruction_list.index; + p_tracker->raytracing_list_usage = p_usage; + } +#ifdef DEV_ENABLED + else if (p_tracker->raytracing_list_usage != p_usage) { + ERR_FAIL_MSG(vformat("Tracker can't have more than one type of usage in the same raytracing list. Raytracing list usage is %d and the requested usage is %d.", p_tracker->raytracing_list_usage, p_usage)); + } +#endif +} + +void RenderingDeviceGraph::add_raytracing_list_usages(VectorView p_trackers, VectorView p_usages) { + DEV_ASSERT(p_trackers.size() == p_usages.size()); + + for (uint32_t i = 0; i < p_trackers.size(); i++) { + add_raytracing_list_usage(p_trackers[i], p_usages[i]); + } +} + +void RenderingDeviceGraph::add_raytracing_list_end() { + int32_t command_index; + uint32_t instruction_data_size = raytracing_instruction_list.data.size(); + uint32_t command_size = sizeof(RecordedRaytracingListCommand) + instruction_data_size; + RecordedRaytracingListCommand *command = static_cast(_allocate_command(command_size, command_index)); + command->type = RecordedCommand::TYPE_RAYTRACING_LIST; + command->self_stages = raytracing_instruction_list.stages; + command->instruction_data_size = instruction_data_size; + memcpy(command->instruction_data(), raytracing_instruction_list.data.ptr(), instruction_data_size); + _add_command_to_graph(raytracing_instruction_list.command_trackers.ptr(), raytracing_instruction_list.command_tracker_usages.ptr(), raytracing_instruction_list.command_trackers.size(), command_index, command); +} + void RenderingDeviceGraph::add_compute_list_begin(RDD::BreadcrumbMarker p_phase, uint32_t p_breadcrumb_data) { compute_instruction_list.clear(); #if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 246c64c067b1..9318d6527702 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -44,6 +44,19 @@ class RenderingDeviceGraph { public: + struct RaytracingListInstruction { + enum Type { + TYPE_NONE, + TYPE_BIND_PIPELINE, + TYPE_BIND_UNIFORM_SET, + TYPE_SET_PUSH_CONSTANT, + TYPE_TRACE_RAYS, + TYPE_UNIFORM_SET_PREPARE_FOR_USE, + }; + + Type type = TYPE_NONE; + }; + struct ComputeListInstruction { enum Type { TYPE_NONE, @@ -86,11 +99,13 @@ class RenderingDeviceGraph { struct RecordedCommand { enum Type { TYPE_NONE, + TYPE_ACCELERATION_STRUCTURE_BUILD, TYPE_BUFFER_CLEAR, TYPE_BUFFER_COPY, TYPE_BUFFER_GET_DATA, TYPE_BUFFER_UPDATE, TYPE_COMPUTE_LIST, + TYPE_RAYTRACING_LIST, TYPE_DRAW_LIST, TYPE_TEXTURE_CLEAR_COLOR, TYPE_TEXTURE_CLEAR_DEPTH_STENCIL, @@ -114,6 +129,8 @@ class RenderingDeviceGraph { int32_t buffer_barrier_index = -1; int32_t buffer_barrier_count = 0; #endif + int32_t acceleration_structure_barrier_index = -1; + int32_t acceleration_structure_barrier_count = 0; int32_t label_index = -1; BitField previous_stages = {}; BitField next_stages = {}; @@ -152,6 +169,9 @@ class RenderingDeviceGraph { RESOURCE_USAGE_ATTACHMENT_FRAGMENT_SHADING_RATE_READ, RESOURCE_USAGE_ATTACHMENT_FRAGMENT_DENSITY_MAP_READ, RESOURCE_USAGE_GENERAL, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ, + RESOURCE_USAGE_ACCELERATION_STRUCTURE_READ_WRITE, RESOURCE_USAGE_MAX }; @@ -166,11 +186,14 @@ class RenderingDeviceGraph { int32_t draw_list_index = -1; ResourceUsage draw_list_usage = RESOURCE_USAGE_NONE; int32_t compute_list_index = -1; + int32_t raytracing_list_index = -1; ResourceUsage compute_list_usage = RESOURCE_USAGE_NONE; + ResourceUsage raytracing_list_usage = RESOURCE_USAGE_NONE; ResourceUsage usage = RESOURCE_USAGE_NONE; BitField usage_access = {}; RDD::BufferID buffer_driver_id; RDD::TextureID texture_driver_id; + RDD::AccelerationStructureID acceleration_structure_driver_id; RDD::TextureSubresourceRange texture_subresources; Size2i texture_size; uint32_t texture_usage = 0; @@ -193,6 +216,7 @@ class RenderingDeviceGraph { write_command_or_list_index = -1; draw_list_index = -1; compute_list_index = -1; + raytracing_list_index = -1; texture_slice_command_index = -1; write_command_list_enabled = false; } @@ -260,6 +284,10 @@ class RenderingDeviceGraph { #endif }; + struct RaytracingInstructionList : InstructionList { + // No extra contents. + }; + struct DrawInstructionList : InstructionList { FramebufferCache *framebuffer_cache = nullptr; RDD::RenderPassID render_pass; @@ -310,6 +338,11 @@ class RenderingDeviceGraph { bool partial_coverage = false; }; + struct RecordedAccelerationStructureBuildCommand : RecordedCommand { + RDD::AccelerationStructureID acceleration_structure; + RDD::BufferID scratch_buffer; + }; + struct RecordedBufferClearCommand : RecordedCommand { RDD::BufferID buffer; uint32_t offset = 0; @@ -346,6 +379,18 @@ class RenderingDeviceGraph { void *userdata = nullptr; }; + struct RecordedRaytracingListCommand : RecordedCommand { + uint32_t instruction_data_size = 0; + + _FORCE_INLINE_ uint8_t *instruction_data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *instruction_data() const { + return reinterpret_cast(&this[1]); + } + }; + struct RecordedComputeListCommand : RecordedCommand { uint32_t instruction_data_size = 0; uint32_t breadcrumb = 0; @@ -622,6 +667,45 @@ class RenderingDeviceGraph { uint32_t set_index = 0; }; + struct RaytracingListBuildAccelerationStructureInstruction : RaytracingListInstruction { + RDD::AccelerationStructureID acceleration_structure; + RDD::AccelerationStructureType acceleration_structure_type; + }; + + struct RaytracingListBindPipelineInstruction : RaytracingListInstruction { + RDD::RaytracingPipelineID pipeline; + }; + + struct RaytracingListBindUniformSetInstruction : RaytracingListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + + struct RaytracingListSetPushConstantInstruction : RaytracingListInstruction { + uint32_t size = 0; + RDD::ShaderID shader; + + _FORCE_INLINE_ uint8_t *data() { + return reinterpret_cast(&this[1]); + } + + _FORCE_INLINE_ const uint8_t *data() const { + return reinterpret_cast(&this[1]); + } + }; + + struct RaytracingListTraceRaysInstruction : RaytracingListInstruction { + uint32_t width = 0; + uint32_t height = 0; + }; + + struct RaytracingListUniformSetPrepareForUseInstruction : RaytracingListInstruction { + RDD::UniformSetID uniform_set; + RDD::ShaderID shader; + uint32_t set_index = 0; + }; + struct ComputeListBindPipelineInstruction : ComputeListInstruction { RDD::PipelineID pipeline; }; @@ -680,6 +764,7 @@ class RenderingDeviceGraph { #if USE_BUFFER_BARRIERS LocalVector buffer_barriers; #endif + LocalVector acceleration_structure_barriers; void clear() { src_stages.clear(); @@ -691,6 +776,7 @@ class RenderingDeviceGraph { #if USE_BUFFER_BARRIERS buffer_barriers.clear(); #endif + acceleration_structure_barriers.clear(); } }; @@ -717,12 +803,14 @@ class RenderingDeviceGraph { LocalVector command_normalization_barriers; LocalVector command_transition_barriers; LocalVector command_buffer_barriers; + LocalVector command_acceleration_structure_barriers; LocalVector command_label_chars; LocalVector command_label_colors; LocalVector command_label_offsets; int32_t command_label_index = -1; DrawInstructionList draw_instruction_list; ComputeInstructionList compute_instruction_list; + RaytracingInstructionList raytracing_instruction_list; uint32_t command_count = 0; uint32_t command_label_count = 0; LocalVector command_list_nodes; @@ -757,13 +845,16 @@ class RenderingDeviceGraph { DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size); ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size); void _check_discardable_attachment_dependency(ResourceTracker *p_resource_tracker, int32_t p_previous_command_index, int32_t p_command_index); + RaytracingListInstruction *_allocate_raytracing_list_instruction(uint32_t p_instruction_size); void _add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command); void _add_texture_barrier_to_command(RDD::TextureID p_texture_id, BitField p_src_access, BitField p_dst_access, ResourceUsage p_prev_usage, ResourceUsage p_next_usage, RDD::TextureSubresourceRange p_subresources, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count); #if USE_BUFFER_BARRIERS void _add_buffer_barrier_to_command(RDD::BufferID p_buffer_id, BitField p_src_access, BitField p_dst_access, int32_t &r_barrier_index, int32_t &r_barrier_count); #endif + void _add_acceleration_structure_barrier_to_command(RDD::AccelerationStructureID p_acceleration_structure_id, BitField p_src_access, BitField p_dst_access, LocalVector &r_barrier_vector, int32_t &r_barrier_index, int32_t &r_barrier_count); void _run_compute_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _get_draw_list_render_pass_and_framebuffer(const RecordedDrawListCommand *p_draw_list_command, RDD::RenderPassID &r_render_pass, RDD::FramebufferID &r_framebuffer); + void _run_raytracing_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _add_draw_list_begin(FramebufferCache *p_framebuffer_cache, RDD::RenderPassID p_render_pass, RDD::FramebufferID p_framebuffer, Rect2i p_region, VectorView p_attachment_operations, VectorView p_attachment_clear_values, BitField p_stages, uint32_t p_breadcrumb, bool p_split_cmd_buffer); void _run_secondary_command_buffer_task(const SecondaryCommandBuffer *p_secondary); @@ -775,6 +866,7 @@ class RenderingDeviceGraph { void _print_render_commands(const RecordedCommandSort *p_sorted_commands, uint32_t p_sorted_commands_count); void _print_draw_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); void _print_compute_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); + void _print_raytracing_list(const uint8_t *p_instruction_data, uint32_t p_instruction_data_size); public: RenderingDeviceGraph(); @@ -782,11 +874,21 @@ class RenderingDeviceGraph { void initialize(RDD *p_driver, RenderingContextDriver::Device p_device, RenderPassCreationFunction p_render_pass_creation_function, uint32_t p_frame_count, RDD::CommandQueueFamilyID p_secondary_command_queue_family, uint32_t p_secondary_command_buffers_per_frame); void finalize(); void begin(); + void add_acceleration_structure_build(RDD::AccelerationStructureID p_acceleration_structure, RDD::BufferID p_scratch_buffer, ResourceTracker *p_dst_tracker, VectorView p_src_trackers); void add_buffer_clear(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, uint32_t p_offset, uint32_t p_size); void add_buffer_copy(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, RDD::BufferCopyRegion p_region); void add_buffer_get_data(RDD::BufferID p_src, ResourceTracker *p_src_tracker, RDD::BufferID p_dst, RDD::BufferCopyRegion p_region); void add_buffer_update(RDD::BufferID p_dst, ResourceTracker *p_dst_tracker, VectorView p_buffer_copies); void add_driver_callback(RDD::DriverCallback p_callback, void *p_userdata, VectorView p_trackers, VectorView p_usages); + void add_raytracing_list_begin(); + void add_raytracing_list_bind_pipeline(RDD::RaytracingPipelineID p_pipeline); + void add_raytracing_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_raytracing_list_set_push_constant(RDD::ShaderID p_shader, const void *p_data, uint32_t p_data_size); + void add_raytracing_list_trace_rays(uint32_t p_width, uint32_t p_height); + void add_raytracing_list_uniform_set_prepare_for_use(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); + void add_raytracing_list_usage(ResourceTracker *p_tracker, ResourceUsage p_usage); + void add_raytracing_list_usages(VectorView p_trackers, VectorView p_usages); + void add_raytracing_list_end(); void add_compute_list_begin(RDD::BreadcrumbMarker p_phase = RDD::BreadcrumbMarker::NONE, uint32_t p_breadcrumb_data = 0); void add_compute_list_bind_pipeline(RDD::PipelineID p_pipeline); void add_compute_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); diff --git a/servers/rendering/rendering_shader_container.cpp b/servers/rendering/rendering_shader_container.cpp index dab68d7b7143..a2c184f557ee 100644 --- a/servers/rendering/rendering_shader_container.cpp +++ b/servers/rendering/rendering_shader_container.cpp @@ -242,12 +242,38 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< LocalVector &r_refl = r_shader.shader_stages; r_refl.resize(spirv_size); + bool pipeline_type_detected = false; for (uint32_t i = 0; i < spirv_size; i++) { RDC::ShaderStage stage = p_spirv[i].shader_stage; RDC::ShaderStage stage_flag = (RDC::ShaderStage)(1 << stage); r_refl[i].shader_stage = stage; r_refl[i]._spirv_data = p_spirv[i].spirv; + if (!pipeline_type_detected) { + switch (stage) { + case RDC::SHADER_STAGE_VERTEX: + case RDC::SHADER_STAGE_FRAGMENT: + case RDC::SHADER_STAGE_TESSELATION_CONTROL: + case RDC::SHADER_STAGE_TESSELATION_EVALUATION: + r_shader.pipeline_type = RDC::PIPELINE_TYPE_RASTERIZATION; + break; + case RDC::SHADER_STAGE_COMPUTE: + r_shader.pipeline_type = RDC::PIPELINE_TYPE_COMPUTE; + break; + case RDC::SHADER_STAGE_RAYGEN: + case RDC::SHADER_STAGE_ANY_HIT: + case RDC::SHADER_STAGE_CLOSEST_HIT: + case RDC::SHADER_STAGE_MISS: + case RDC::SHADER_STAGE_INTERSECTION: + r_shader.pipeline_type = RDC::PIPELINE_TYPE_RAYTRACING; + break; + default: + DEV_ASSERT(false && "Unknown shader stage."); + } + + pipeline_type_detected = true; + } + const Vector &dynamic_buffers = p_spirv[i].dynamic_buffers; if (stage == RDC::SHADER_STAGE_COMPUTE) { @@ -255,7 +281,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< "Compute shaders can only receive one stage, dedicated to compute."); } ERR_FAIL_COND_V_MSG(reflection.stages_bits.has_flag(stage_flag), FAILED, - "Stage " + String(RDC::SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); + "Stage " + String(RDC::SHADER_STAGE_NAMES[stage]) + " submitted more than once."); reflection.stages_bits.set_flag(stage_flag); { @@ -370,8 +396,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span< is_image = true; } break; case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { - ERR_PRINT("Acceleration structure not supported."); - continue; + uniform.type = RDC::UNIFORM_TYPE_ACCELERATION_STRUCTURE; } break; } @@ -611,7 +636,7 @@ void RenderingShaderContainer::set_from_shader_reflection(const ReflectShader &p reflection_data.vertex_input_mask = p_reflection.vertex_input_mask; reflection_data.fragment_output_mask = p_reflection.fragment_output_mask; reflection_data.specialization_constants_count = p_reflection.specialization_constants.size(); - reflection_data.is_compute = p_reflection.is_compute(); + reflection_data.pipeline_type = p_reflection.pipeline_type; reflection_data.has_multiview = p_reflection.has_multiview; reflection_data.has_dynamic_buffers = p_reflection.has_dynamic_buffers; reflection_data.compute_local_size[0] = p_reflection.compute_local_size[0]; @@ -668,7 +693,7 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re shader_refl.push_constant_stages = reflection_data.push_constant_stages_mask; shader_refl.vertex_input_mask = reflection_data.vertex_input_mask; shader_refl.fragment_output_mask = reflection_data.fragment_output_mask; - shader_refl.is_compute = reflection_data.is_compute; + shader_refl.pipeline_type = reflection_data.pipeline_type; shader_refl.has_multiview = reflection_data.has_multiview; shader_refl.has_dynamic_buffers = reflection_data.has_dynamic_buffers; shader_refl.compute_local_size[0] = reflection_data.compute_local_size[0]; diff --git a/servers/rendering/rendering_shader_container.h b/servers/rendering/rendering_shader_container.h index 364294d26935..45151b0d50ea 100644 --- a/servers/rendering/rendering_shader_container.h +++ b/servers/rendering/rendering_shader_container.h @@ -59,7 +59,7 @@ class RenderingShaderContainer : public RefCounted { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; uint32_t specialization_constants_count = 0; - uint32_t is_compute = 0; + RDC::PipelineType pipeline_type = RDC::PIPELINE_TYPE_RASTERIZATION; uint32_t has_multiview = 0; uint32_t has_dynamic_buffers = 0; uint32_t compute_local_size[3] = {}; @@ -232,6 +232,7 @@ class RenderingShaderContainer : public RefCounted { uint32_t push_constant_size = 0; bool has_multiview = false; bool has_dynamic_buffers = false; + RDC::PipelineType pipeline_type = RDC::PIPELINE_TYPE_RASTERIZATION; LocalVector shader_stages; LocalVector uniform_sets; diff --git a/servers/rendering/shader_compiler.h b/servers/rendering/shader_compiler.h index 433f8fa28f0f..f01c9d102beb 100644 --- a/servers/rendering/shader_compiler.h +++ b/servers/rendering/shader_compiler.h @@ -40,6 +40,11 @@ class ShaderCompiler { STAGE_VERTEX, STAGE_FRAGMENT, STAGE_COMPUTE, + STAGE_RAYGEN, + STAGE_ANY_HIT, + STAGE_CLOSEST_HIT, + STAGE_MISS, + STAGE_INTERSECTION, STAGE_MAX };