diff --git a/antora/modules/ROOT/nav.adoc b/antora/modules/ROOT/nav.adoc index cfe1df8e5..74164bdcd 100644 --- a/antora/modules/ROOT/nav.adoc +++ b/antora/modules/ROOT/nav.adoc @@ -1,6 +1,6 @@ //// -- Copyright (c) 2023-2025, Holochip Inc -- Copyright (c) 2023-2025, Sascha Willems +- Copyright (c) 2023-2026, Holochip Inc +- Copyright (c) 2023-2026, Sascha Willems - Copyright (c) 2025, Arm Limited and Contributors - - SPDX-License-Identifier: Apache-2.0 @@ -84,6 +84,7 @@ *** xref:samples/extensions/hpp_push_descriptors/README.adoc[Push descriptors (Vulkan-Hpp)] ** xref:samples/extensions/ray_tracing_basic/README.adoc[Raytracing basic] ** xref:samples/extensions/ray_tracing_extended/README.adoc[Raytracing extended] +** xref:samples/extensions/ray_tracing_invocation_reorder/README.adoc[Ray tracing invocation reorder (SER)] ** xref:samples/extensions/ray_queries/README.adoc[Ray queries] ** xref:samples/extensions/ray_tracing_reflection/README.adoc[Ray tracing reflection] ** xref:samples/extensions/ray_tracing_position_fetch/README.adoc[Ray tracing position fetch] diff --git a/framework/vulkan_type_mapping.h b/framework/vulkan_type_mapping.h index c9c75fc06..b86b37a83 100644 --- a/framework/vulkan_type_mapping.h +++ b/framework/vulkan_type_mapping.h @@ -1,5 +1,5 @@ /* Copyright (c) 2025, Arm Limited and Contributors - * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -243,6 +243,20 @@ struct HPPType using Type = vk::PhysicalDeviceRayTracingPipelineFeaturesKHR; }; +#ifdef VK_EXT_ray_tracing_invocation_reorder +template <> +struct HPPType +{ + using Type = vk::PhysicalDeviceRayTracingInvocationReorderFeaturesEXT; +}; +#endif + +template <> +struct HPPType +{ + using Type = vk::PhysicalDeviceRayTracingInvocationReorderFeaturesNV; +}; + template <> struct HPPType { diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 0ba7aa663..786ddd974 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2025, Arm Limited and Contributors +# Copyright (c) 2019-2026, Arm Limited and Contributors # # SPDX-License-Identifier: Apache-2.0 # @@ -75,6 +75,7 @@ set(ORDER_LIST "ray_tracing_basic" "ray_tracing_extended" "ray_tracing_reflection" + "ray_tracing_invocation_reorder" "timeline_semaphore" "shader_object" "shader_debugprintf" diff --git a/samples/extensions/README.adoc b/samples/extensions/README.adoc index cda58c81d..a81c60ef3 100644 --- a/samples/extensions/README.adoc +++ b/samples/extensions/README.adoc @@ -1,6 +1,6 @@ //// - Copyright (c) 2025, Arm Limited and Contributors -- Copyright (c) 2021-2025, The Khronos Group +- Copyright (c) 2021-2026, The Khronos Group - - SPDX-License-Identifier: Apache-2.0 - @@ -312,3 +312,9 @@ Demonstrate how to build data graph pipelines and execute neural networks: * xref:./{extension_samplespath}tensor_and_data_graph/simple_tensor_and_data_graph/README.adoc[simple_tensor_and_data_graph] - Explains how to set up and execute a simple neural network using a data graph pipeline. + +=== xref:./{extension_samplespath}ray_tracing_invocation_reorder/README.adoc[Ray Tracing Invocation Reorder] + +*Extensions:* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[`VK_EXT_ray_tracing_invocation_reorder`] + +Demonstrate how to optimize ray tracing pipelines by reordering the invocation order. \ No newline at end of file diff --git a/samples/extensions/ray_tracing_invocation_reorder/CMakeLists.txt b/samples/extensions/ray_tracing_invocation_reorder/CMakeLists.txt new file mode 100644 index 000000000..651af2bab --- /dev/null +++ b/samples/extensions/ray_tracing_invocation_reorder/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright (c) 2025-2026, Holochip Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 the "License"; +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME) +get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH) +get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME) + +add_sample_with_tags( + ID ${FOLDER_NAME} + CATEGORY ${CATEGORY_NAME} + AUTHOR "Holochip Inc." + NAME "Ray tracing invocation reorder" + DESCRIPTION "Demonstrates Shader Execution Reordering (SER) using VK_EXT_ray_tracing_invocation_reorder to reduce divergence" + SHADER_FILES_GLSL + # Note: We do not compile GLSL here because glslc may lack GL_EXT_shader_invocation_reorder (current public SDK doesn't support it). +# "ray_tracing_invocation_reorder/glsl/raygen.rgen" +# "ray_tracing_invocation_reorder/glsl//miss.rmiss" +# "ray_tracing_invocation_reorder/glsl//closesthit_flame.rchit" +# "ray_tracing_invocation_reorder/glsl//closesthit_normal.rchit" +# "ray_tracing_invocation_reorder/glsl//closesthit_refraction.rchit" + SHADER_FILES_SLANG + "ray_tracing_invocation_reorder/slang/raygen.rgen.slang" + "ray_tracing_invocation_reorder/slang/miss.rmiss.slang" + "ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.slang" + "ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.slang" + "ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.slang" + ) diff --git a/samples/extensions/ray_tracing_invocation_reorder/README.adoc b/samples/extensions/ray_tracing_invocation_reorder/README.adoc new file mode 100644 index 000000000..eaff12406 --- /dev/null +++ b/samples/extensions/ray_tracing_invocation_reorder/README.adoc @@ -0,0 +1,290 @@ +//// +- Copyright (c) 2025-2026, Holochip Inc. +- +- SPDX-License-Identifier: Apache-2.0 +- +- Licensed under the Apache License, Version 2.0 the "License"; +- you may not use this file except in compliance with the License. +- You may obtain a copy of the License at +- +- http://www.apache.org/licenses/LICENSE-2.0 +- +- Unless required by applicable law or agreed to in writing, software +- distributed under the License is distributed on an "AS IS" BASIS, +- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +- See the License for the specific language governing permissions and +- limitations under the License. +- +//// + += Shader Execution Reordering (SER) for Ray Tracing + +ifdef::site-gen-antora[] +TIP: The source for this sample can be found in the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/ray_tracing_invocation_reorder[Khronos Vulkan samples github repository]. +endif::[] + +*Extensions*: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[`VK_EXT_ray_tracing_invocation_reorder`] + +*GLSL Extensions*: https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GLSL_EXT_shader_invocation_reorder.txt[`GL_EXT_shader_invocation_reorder`] + +== Overview + +This sample demonstrates *Shader Execution Reordering (SER)*, a powerful optimization technique for ray tracing that reduces performance issues caused by divergence. SER allows you to reorganize shader invocations across the GPU to group similar work together, significantly improving coherency and performance. + +The sample shows how to use the `VK_EXT_ray_tracing_invocation_reorder` extension with hit objects and the `reorderThreadEXT()` / `ReorderThread()` functions to achieve performance improvements of 20–50% or more in ray tracing workloads. + +IMPORTANT: glslc in current Vulkan SDKs does not yet support `GL_EXT_shader_invocation_reorder`. For this reason, this sample is authored and built with **Slang** by default. The GLSL source files are provided for reference only and are not compiled by the build system. + +TIP: Prefer the provided **Slang** shaders for this sample. They compile to SPIR-V using the Slang compiler and expose SER via `HitObject` and `ReorderThread()` intrinsics. (GLSL usage is shown for completeness, but may not compile with glslc until support lands.) + +== The Divergence Problem + +Ray tracing faces two major performance challenges: + +=== Control Flow Divergence + +GPUs execute shader code in parallel on groups of invocations (subgroups, typically 32 or 64 threads). When invocations in the same subgroup take different code paths—such as invoking different shaders or executing different branches—the GPU must serialize execution, with active invocations waiting for inactive ones to finish. + +In ray tracing, this commonly occurs when: + +* Adjacent rays hit different objects and invoke different closest-hit shaders +* Some rays miss while others hit geometry +* Rays terminate at different bounce depths + +=== Data Divergence + +When rays become incoherent, they access scattered memory locations for geometry data, textures, and acceleration structures. This leads to: + +* Poor cache utilization +* Increased memory bandwidth requirements +* Stalls waiting for memory subsystems + +== How Shader Execution Reordering Helps + +SER addresses these issues by introducing *hit objects* that separate ray traversal from shader invocation, allowing the GPU to pause execution and reorder invocations: + +[source,glsl] +---- +// Traditional approach: traverse and invoke shaders in one call +traceRayEXT(topLevelAS, rayFlags, cullMask, sbtOffset, sbtStride, + missIndex, rayOrigin, rayTMin, rayDirection, rayTMax, payloadIndex); + +// SER approach: separate traversal from shader invocation +hitObjectEXT hitObj; +hitObjectRecordEmptyEXT(hitObj); + +// Step 1: Traverse acceleration structure +hitObjectTraceRayEXT(hitObj, topLevelAS, rayFlags, cullMask, + sbtOffset, sbtStride, missIndex, + rayOrigin, rayTMin, rayDirection, rayTMax, payloadIndex); + +// Step 2: Reorder invocations for better coherency +reorderThreadEXT(hitObj); + +// Step 3: Invoke the miss or closest-hit shader +hitObjectExecuteShaderEXT(hitObj, payloadIndex); +---- + +The same concepts apply in **Slang** with HLSL-style syntax: + +[source,slang] +---- +// Traditional approach: traverse and invoke shaders in one call +TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 0, 0, 0, ray, payload); + +// SER approach: separate traversal from shader invocation +RayDesc ray; +ray.Origin = origin.xyz; +ray.Direction = direction.xyz; +ray.TMin = tmin; +ray.TMax = tmax; + +// Step 1: Trace ray and store hit information in hit object +HitObject hitObj = HitObject::TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, + 0, 0, 0, ray, payload); + +// Step 2: Reorder invocations for better coherency +ReorderThread(hitObj); + +// Step 3: Execute the miss or closest-hit shader +HitObject::Invoke(topLevelAS, hitObj, payload); +---- + +By calling `reorderThreadEXT()` (GLSL) or `ReorderThread()` (Slang), the GPU can: + +* Group invocations that will execute the same shader +* Organize invocations accessing similar data +* Reduce overall divergence and improve cache efficiency + +== Using Coherence Hints + +For even better performance, you can provide hints to guide the reordering: + +[source,glsl] +---- +// Reorder with a coherence hint +uint hint = 0; +if (hitObjectIsHitEXT(hitObj)) +{ + hint = hitObjectGetInstanceIdEXT(hitObj); +} +reorderThreadEXT(hitObj, hint, 8); // Use 8 bits for the hint +---- + +In Slang, the equivalent looks like this: + +[source,slang] +---- +uint hint = 0; +if (hitObj.IsHit()) +{ + hint = hitObj.GetInstanceIndex(); +} +ReorderThread(hitObj, hint, 8); +---- + +The GPU sorts invocations by: + +1. *Shader ID* (highest priority - which shader will execute) +2. *Your hint* (middle priority - custom application-specific data) +3. *Implementation-specific data* (lowest priority) + +Good coherence hints include: + +* Material IDs or flags that affect control flow +* Texture binding indices for similar data access +* Early-exit conditions (e.g., path length, Russian Roulette) + +== Hit Objects Without Reordering + +Even if you don't need reordering, hit objects provide valuable functionality: + +* *Shadow/AO rays*: Skip shader invocation entirely with `hitObjectIsHitEXT()` or `hitObjectIsMissEXT()` +* *Flexible payloads*: Use different payload types for traversal vs. shader invocation +* *Direct hit access*: Query hit information (positions, normals, matrices) at the ray generation level + +== Best Practices + +=== When to Use SER + +SER provides the biggest benefits when you have: + +* *Path tracing* with multiple bounces and material diversity +* *Multiple closest-hit shaders* representing different materials +* *Secondary, scattered rays* (e.g., rough reflections) +* *Stochastic effects* creating natural divergence + +SER may not help as much with: + +* Highly coherent primary rays +* Simple shaders with minimal divergence +* Single übershaders with minimal branching + +=== Minimizing Live State + +When `reorderThreadEXT()` is called, the GPU must save and restore the invocation's local variables (live state). To maximize performance: + +* Avoid keeping variables live across the `reorderThreadEXT()` call +* Use smaller data types (FP16 instead of FP32 where appropriate) +* Pack flags and enums into bit fields +* Audit your ray payloads to remove unnecessary fields + +=== Device Support + +The extension has backwards-compatibility built in: + +* On devices with hardware SER support, `reorderThreadEXT()` actively reorders invocations +* On older devices, `reorderThreadEXT()` becomes a no-op, but hit objects still work +* Query `VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT` to check support: + +[source,cpp] +---- +VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT serProperties{}; +serProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_EXT; + +VkPhysicalDeviceProperties2 deviceProperties{}; +deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; +deviceProperties.pNext = &serProperties; + +vkGetPhysicalDeviceProperties2(physicalDevice, &deviceProperties); + +bool canReorder = (serProperties.rayTracingInvocationReorderReorderingHint == + VK_RAY_TRACING_INVOCATION_REORDER_MODE_REORDER_EXT); +---- + +== This Sample + +This sample demonstrates SER with an interactive comparison: + +* *Three material types* that create control flow divergence: +** Diffuse ("normal") textured surfaces +** Refraction (glass, or smoke) surfaces +** Flame/emissive particles +* *Toggle SER on/off* to see the performance difference +* *Coherence hints* based on instance ID (can be toggled) +* *Real-time UI* showing whether the device supports reordering + +The scene is intentionally designed to maximize divergence when SER is disabled, showing the benefits of reordering when enabled. + +=== Key Features + +* Enable/disable SER dynamically via UI +* Toggle coherence hints to see their impact +* Compare traditional `traceRayEXT()`/`TraceRay()` vs. hit objects + `reorderThreadEXT()`/`ReorderThread()` +* Device capability detection and display + +== Enabling the Extension + +To use SER in your application: + +[source,cpp] +---- +// Enable the extension +add_device_extension(VK_EXT_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME); + +// Request the feature +REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayTracingInvocationReorderFeaturesEXT, + rayTracingInvocationReorder); +---- + +In Slang shaders: + +[source,slang] +---- +// Use HitObject + ReorderThread to enable SER +HitObject hitObj = HitObject::TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 0, 0, 0, ray, payload); +// Optionally provide a coherence hint (e.g., instance index) +uint hint = hitObj.IsHit() ? hitObj.GetInstanceIndex() : 0; +ReorderThread(hitObj, hint, 8); +HitObject::Invoke(topLevelAS, hitObj, payload); +---- + +If you use GLSL, enable the extension explicitly in your shader: + +[source,glsl] +---- +#extension GL_EXT_shader_invocation_reorder : enable +---- + +NOTE: glslc in current public SDKs may not compile GLSL shaders using this extension yet; prefer Slang for now. + +== Performance Expectations + +Real-world applications have seen: + +* *11-24%* improvement in path tracing (with live state optimization) +* *40-50%* in synthetic benchmarks with high divergence +* *30-40%* when combined with other optimizations (e.g., Opacity Micromaps) + +The actual gain depends on: + +* Scene complexity and material diversity +* Amount of control flow and data divergence +* Quality of coherence hints +* Live state size + +== Resources + +* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[VK_EXT_ray_tracing_invocation_reorder specification] +* https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GLSL_EXT_shader_invocation_reorder.txt[GL_EXT_shader_invocation_reorder specification] +* https://github.com/microsoft/DirectX-Specs/blob/master/d3d/Raytracing.md#shader-execution-reordering[DirectX Shader Execution Reordering documentation] diff --git a/samples/extensions/ray_tracing_invocation_reorder/ray_tracing_invocation_reorder.cpp b/samples/extensions/ray_tracing_invocation_reorder/ray_tracing_invocation_reorder.cpp new file mode 100644 index 000000000..1bfd6b9ab --- /dev/null +++ b/samples/extensions/ray_tracing_invocation_reorder/ray_tracing_invocation_reorder.cpp @@ -0,0 +1,1492 @@ +/* Copyright (c) 2021-2026 Holochip Corporation + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Basic example for hardware accelerated ray tracing using VK_KHR_ray_tracing_pipeline and VK_KHR_acceleration_structure + */ + +#include "ray_tracing_invocation_reorder.h" +#include "gltf_loader.h" +#include "scene_graph/components/camera.h" +#include "scene_graph/components/material.h" +#include "scene_graph/components/mesh.h" +#include "scene_graph/components/pbr_material.h" +#include +#define ASSERT_LOG(cond, msg) \ + { \ + if (!(cond)) \ + { \ + LOGE(msg); \ + throw std::runtime_error(msg); \ + } \ + } + +// contains information about the vertex + +struct RaytracingInvocationReorder::NewVertex +{ + glm::vec3 pos; + glm::vec3 normal; + glm::vec2 tex_coord; +}; + +struct RaytracingInvocationReorder::Model +{ + std::vector vertices; + std::vector> triangles; + VkTransformMatrixKHR default_transform; + uint32_t texture_index; + uint32_t object_type; + Model() : + default_transform({1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f}), + texture_index(0), + object_type(0) + {} +}; + +RaytracingInvocationReorder::RaytracingInvocationReorder() : + index_count(0), pipeline(VK_NULL_HANDLE), pipeline_layout(VK_NULL_HANDLE), descriptor_set(VK_NULL_HANDLE), descriptor_set_layout(VK_NULL_HANDLE) +{ + title = "Ray tracing with extended features"; + + // SPIRV 1.4 requires Vulkan 1.1 + set_api_version(VK_API_VERSION_1_1); + + // Ray tracing related extensions required by this sample + add_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + add_device_extension(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME); + + // Shader Execution Reordering extension - try EXT first, fallback to NV + // Note: We add the extension optimistically here. The actual availability check + // and feature request happens in request_gpu_features() +#ifdef VK_EXT_ray_tracing_invocation_reorder + add_device_extension(VK_EXT_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME, true); // optional +#endif + add_device_extension(VK_NV_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME, true); // optional + + // Required by VK_KHR_acceleration_structure + add_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME); + add_device_extension(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME); + add_device_extension(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME); + + // Required for VK_KHR_ray_tracing_pipeline + add_device_extension(VK_KHR_SPIRV_1_4_EXTENSION_NAME); + + // Required by VK_KHR_spirv_1_4 + add_device_extension(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); +} + +RaytracingInvocationReorder::~RaytracingInvocationReorder() +{ + if (has_device()) + { + // Free raytracing command buffers + if (!raytracing_command_buffers.empty()) + { + vkFreeCommandBuffers(get_device().get_handle(), get_device().get_command_pool().get_handle(), + static_cast(raytracing_command_buffers.size()), raytracing_command_buffers.data()); + raytracing_command_buffers.clear(); + } + + flame_texture.image.reset(); + vkDestroySampler(get_device().get_handle(), flame_texture.sampler, nullptr); + vkDestroyPipeline(get_device().get_handle(), pipeline, nullptr); + vkDestroyPipelineLayout(get_device().get_handle(), pipeline_layout, nullptr); + vkDestroyDescriptorSetLayout(get_device().get_handle(), descriptor_set_layout, nullptr); + vkDestroyImageView(get_device().get_handle(), storage_image.view, nullptr); + vkDestroyImage(get_device().get_handle(), storage_image.image, nullptr); + vkFreeMemory(get_device().get_handle(), storage_image.memory, nullptr); +#ifndef USE_FRAMEWORK_ACCELERATION_STRUCTURE + delete_acceleration_structure(top_level_acceleration_structure); +#endif + raytracing_scene.reset(); + vertex_buffer.reset(); + dynamic_vertex_buffer.reset(); + index_buffer.reset(); + dynamic_index_buffer.reset(); + ubo.reset(); + } +} + +void RaytracingInvocationReorder::request_gpu_features(vkb::core::PhysicalDeviceC &gpu) +{ + // Enable extension features required by this sample + // These are passed to device creation via a pNext structure chain + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceBufferDeviceAddressFeatures, bufferDeviceAddress); + + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayTracingPipelineFeaturesKHR, rayTracingPipeline); + + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceAccelerationStructureFeaturesKHR, accelerationStructure); + + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceDescriptorIndexingFeaturesEXT, shaderSampledImageArrayNonUniformIndexing); + + // We read/write a storage image without specifying a format in the shader (untyped image) + // so we must enable these core device features. + gpu.get_mutable_requested_features().shaderStorageImageReadWithoutFormat = VK_TRUE; + gpu.get_mutable_requested_features().shaderStorageImageWriteWithoutFormat = VK_TRUE; + + // Enable Shader Execution Reordering feature - try EXT first, fallback to NV + // Check which extension is available +#ifdef VK_EXT_ray_tracing_invocation_reorder + if (gpu.is_extension_supported(VK_EXT_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME)) + { + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayTracingInvocationReorderFeaturesEXT, rayTracingInvocationReorder); + using_nv_extension = false; + LOGI("Using VK_EXT_ray_tracing_invocation_reorder"); + } + else +#endif + if (gpu.is_extension_supported(VK_NV_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME)) + { + REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV, rayTracingInvocationReorder); + using_nv_extension = true; + LOGI("Using VK_NV_ray_tracing_invocation_reorder"); + } + else + { + throw std::runtime_error("Ray tracing invocation reorder extension is not supported"); + } + + if (gpu.get_features().samplerAnisotropy) + { + gpu.get_mutable_requested_features().samplerAnisotropy = true; + } +} + +/* + Set up a storage image that the ray generation shader will be writing to +*/ +void RaytracingInvocationReorder::create_storage_image() +{ + storage_image.width = width; + storage_image.height = height; + + VkImageCreateInfo image = vkb::initializers::image_create_info(); + image.imageType = VK_IMAGE_TYPE_2D; + image.format = VK_FORMAT_B8G8R8A8_UNORM; + image.extent.width = storage_image.width; + image.extent.height = storage_image.height; + image.extent.depth = 1; + image.mipLevels = 1; + image.arrayLayers = 1; + image.samples = VK_SAMPLE_COUNT_1_BIT; + image.tiling = VK_IMAGE_TILING_OPTIMAL; + image.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + image.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + VK_CHECK(vkCreateImage(get_device().get_handle(), &image, nullptr, &storage_image.image)); + + VkMemoryRequirements memory_requirements; + vkGetImageMemoryRequirements(get_device().get_handle(), storage_image.image, &memory_requirements); + VkMemoryAllocateInfo memory_allocate_info = vkb::initializers::memory_allocate_info(); + memory_allocate_info.allocationSize = memory_requirements.size; + memory_allocate_info.memoryTypeIndex = get_device().get_gpu().get_memory_type(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + VK_CHECK(vkAllocateMemory(get_device().get_handle(), &memory_allocate_info, nullptr, &storage_image.memory)); + VK_CHECK(vkBindImageMemory(get_device().get_handle(), storage_image.image, storage_image.memory, 0)); + + VkImageViewCreateInfo color_image_view = vkb::initializers::image_view_create_info(); + color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D; + color_image_view.format = VK_FORMAT_B8G8R8A8_UNORM; + color_image_view.subresourceRange = {}; + color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + color_image_view.subresourceRange.baseMipLevel = 0; + color_image_view.subresourceRange.levelCount = 1; + color_image_view.subresourceRange.baseArrayLayer = 0; + color_image_view.subresourceRange.layerCount = 1; + color_image_view.image = storage_image.image; + VK_CHECK(vkCreateImageView(get_device().get_handle(), &color_image_view, nullptr, &storage_image.view)); + + VkCommandBuffer command_buffer = get_device().create_command_buffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + vkb::image_layout_transition(command_buffer, + storage_image.image, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + {}, + {}, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_GENERAL, + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}); + get_device().flush_command_buffer(command_buffer, queue); +} + +/* + Gets the device address from a buffer that's needed in many places during the ray tracing setup +*/ +uint64_t RaytracingInvocationReorder::get_buffer_device_address(VkBuffer buffer) +{ + VkBufferDeviceAddressInfoKHR buffer_device_address_info{}; + buffer_device_address_info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + buffer_device_address_info.buffer = buffer; + return vkGetBufferDeviceAddressKHR(get_device().get_handle(), &buffer_device_address_info); +} + +void RaytracingInvocationReorder::create_flame_model() +{ + flame_texture = load_texture("textures/generated_flame.ktx", vkb::sg::Image::Color); + std::vector pts_ = {{0, 0, 0}, + {1, 0, 0}, + {1, 1, 0}, + {0, 1, 0}}; + std::vector indices_ = {{0, 1, 2}, + {0, 2, 3}}; + + std::vector vertices; + for (auto &pt : pts_) + { + NewVertex vertex; + vertex.pos = pt - glm::vec3(0.5f, 0.5f, 0.f); // center the point + vertex.normal = {0, 0, 1}; + vertex.tex_coord = {static_cast(pt.x), 1.f - static_cast(pt.y)}; + vertices.push_back(vertex); + } + + Model model; + model.vertices = vertices; + model.triangles = indices_; + model.object_type = OBJECT_FLAME; + model.texture_index = static_cast(raytracing_scene->imageInfos.size()); + VkDescriptorImageInfo image_info; + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info.imageView = flame_texture.image->get_vk_image_view().get_handle(); + image_info.sampler = flame_texture.sampler; + + raytracing_scene->models.emplace_back(std::move(model)); + raytracing_scene->imageInfos.push_back(image_info); + + flame_generator = FlameParticleGenerator(glm::vec3{-0.15, -1.5, -2.3}, glm::vec3{0, -1, 0}, 0.5f, 512); +} + +void RaytracingInvocationReorder::create_static_object_buffers() +{ + assert(!!raytracing_scene); + auto &models = raytracing_scene->models; + auto &model_buffers = raytracing_scene->model_buffers; + model_buffers.resize(0); + + std::vector vertex_buffer_offsets(models.size()), index_buffer_offsets(models.size()); + uint32_t nTotalVertices = 0, nTotalTriangles = 0; + for (size_t i = 0; i < models.size(); ++i) + { + vertex_buffer_offsets[i] = nTotalVertices * sizeof(NewVertex); + nTotalVertices += models[i].vertices.size(); + + index_buffer_offsets[i] = nTotalTriangles * sizeof(Triangle); + nTotalTriangles += models[i].triangles.size(); + } + + // uint32_t firstVertex = 0, primitiveOffset = 0; + auto vertex_buffer_size = nTotalVertices * sizeof(NewVertex); + auto index_buffer_size = nTotalTriangles * sizeof(Triangle); + + // Create a staging buffer. (If staging buffer use is disabled, then this will be the final buffer) + std::unique_ptr staging_vertex_buffer = nullptr, staging_index_buffer = nullptr; + static constexpr VkBufferUsageFlags buffer_usage_flags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + const VkBufferUsageFlags staging_flags = scene_options.use_vertex_staging_buffer ? VK_BUFFER_USAGE_TRANSFER_SRC_BIT : buffer_usage_flags; + staging_vertex_buffer = std::make_unique(get_device(), vertex_buffer_size, staging_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); + staging_index_buffer = std::make_unique(get_device(), index_buffer_size, staging_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); + + // Copy over the data for each of the models + for (size_t i = 0; i < models.size(); ++i) + { + auto &model = models[i]; + staging_vertex_buffer->update(model.vertices.data(), model.vertices.size() * sizeof(model.vertices[0]), vertex_buffer_offsets[i]); + staging_index_buffer->update(model.triangles.data(), model.triangles.size() * sizeof(model.triangles[0]), index_buffer_offsets[i]); + } + + // now transfer over to the end buffer + if (scene_options.use_vertex_staging_buffer) + { + auto cmd = get_device().get_command_pool().request_command_buffer(); + cmd->begin(VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, VK_NULL_HANDLE); + auto copy = [this, &cmd](vkb::core::BufferC &staging_buffer) { + auto output_buffer = std::make_unique(get_device(), staging_buffer.get_size(), buffer_usage_flags | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VMA_MEMORY_USAGE_GPU_ONLY); + cmd->copy_buffer(staging_buffer, *output_buffer, staging_buffer.get_size()); + + vkb::BufferMemoryBarrier barrier; + barrier.src_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + barrier.dst_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + barrier.src_access_mask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + cmd->buffer_memory_barrier(*output_buffer, 0, VK_WHOLE_SIZE, barrier); + return output_buffer; + }; + vertex_buffer = copy(*staging_vertex_buffer); + index_buffer = copy(*staging_index_buffer); + + cmd->end(); + auto &queue = get_device().get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT, 0); + queue.submit(*cmd, get_device().get_fence_pool().request_fence()); + get_device().get_fence_pool().wait(); + } + else + { + vertex_buffer = std::move(staging_vertex_buffer); + index_buffer = std::move(staging_index_buffer); + } + + for (size_t i = 0; i < models.size(); ++i) + { + ModelBuffer buffer; + buffer.vertex_offset = vertex_buffer_offsets[i]; + buffer.index_offset = index_buffer_offsets[i]; + buffer.is_static = true; + buffer.default_transform = models[i].default_transform; + buffer.num_vertices = models[i].vertices.size(); + buffer.num_triangles = models[i].triangles.size(); + buffer.texture_index = models[i].texture_index; + buffer.object_type = models[i].object_type; + model_buffers.emplace_back(std::move(buffer)); + } +} + +/* + Create the bottom level acceleration structure that contains the scene's geometry (triangles) +*/ +void RaytracingInvocationReorder::create_bottom_level_acceleration_structure(bool is_update, bool print_time) +{ + assert(!!raytracing_scene); + /** + Though we use similar code to handle static and dynamic objects, several parts differ: + 1. Static / dynamic objects have different buffers (device-only vs host-visible) + 2. Dynamic objects use different flags (i.e. for fast rebuilds) + */ + + assert(!!vertex_buffer && !!index_buffer); + const uint64_t static_vertex_handle = get_buffer_device_address(vertex_buffer->get_handle()), + static_index_handle = get_buffer_device_address(index_buffer->get_handle()), + dynamic_vertex_handle = dynamic_vertex_buffer ? get_buffer_device_address(dynamic_vertex_buffer->get_handle()) : 0, + dynamic_index_handle = dynamic_index_buffer ? get_buffer_device_address(dynamic_index_buffer->get_handle()) : 0; + auto &model_buffers = raytracing_scene->model_buffers; + for (auto &model_buffer : model_buffers) + { + if (model_buffer.is_static && is_update) + { + continue; + } + const VkBufferUsageFlags buffer_usage_flags = VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + // Set up a single transformation matrix that can be used to transform the whole geometry for a single bottom level acceleration structure + VkTransformMatrixKHR transform_matrix = model_buffer.default_transform; + if (!model_buffer.transform_matrix_buffer || model_buffer.transform_matrix_buffer->get_size() != sizeof(transform_matrix)) + { + model_buffer.transform_matrix_buffer = std::make_unique(get_device(), sizeof(transform_matrix), buffer_usage_flags, VMA_MEMORY_USAGE_CPU_TO_GPU); + } + model_buffer.transform_matrix_buffer->update(&transform_matrix, sizeof(transform_matrix)); + + if (model_buffer.bottom_level_acceleration_structure == nullptr) + { + model_buffer.bottom_level_acceleration_structure = std::make_unique( + get_device(), VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR); + model_buffer.object_id = model_buffer.bottom_level_acceleration_structure->add_triangle_geometry( + model_buffer.is_static ? *vertex_buffer : *dynamic_vertex_buffer, + model_buffer.is_static ? *index_buffer : *dynamic_index_buffer, + *model_buffer.transform_matrix_buffer, + static_cast(model_buffer.num_triangles), + static_cast(model_buffer.num_vertices) - 1, + sizeof(NewVertex), + 0, + VK_FORMAT_R32G32B32_SFLOAT, + VK_INDEX_TYPE_UINT32, + VK_GEOMETRY_OPAQUE_BIT_KHR, + model_buffer.vertex_offset + (model_buffer.is_static ? static_vertex_handle : dynamic_vertex_handle), + model_buffer.index_offset + (model_buffer.is_static ? static_index_handle : dynamic_index_handle)); + } + else + { + model_buffer.bottom_level_acceleration_structure->update_triangle_geometry( + model_buffer.object_id, + dynamic_vertex_buffer, + dynamic_index_buffer, + model_buffer.transform_matrix_buffer, + static_cast(model_buffer.num_triangles), + static_cast(model_buffer.num_vertices) - 1, + sizeof(NewVertex), + 0, VK_FORMAT_R32G32B32_SFLOAT, VK_GEOMETRY_OPAQUE_BIT_KHR, + model_buffer.vertex_offset + (model_buffer.is_static ? static_vertex_handle : dynamic_vertex_handle), + model_buffer.index_offset + (model_buffer.is_static ? static_index_handle : dynamic_index_handle)); + } + model_buffer.bottom_level_acceleration_structure->build(queue, + model_buffer.is_static ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR, + is_update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR); + } +} + +VkTransformMatrixKHR RaytracingInvocationReorder::calculate_rotation(glm::vec3 pt, float scale, bool freeze_z) +{ + using namespace glm; + auto normal = normalize(pt + camera.position); + if (freeze_z) + { + normal = normalize(abs(dot(normal, vec3{0, 1, 0})) > 0.99f ? vec3{0, 0, 1} : vec3{normal.x, 0.f, normal.z}); + } + auto u = normalize(cross(normal, vec3(0, 1, 0))); + auto v = normalize(cross(normal, u)); + + // wait to multiply by scale until after calculating basis to prevent floating point problems + normal *= scale; + u *= scale; + v *= scale; + return { + u.x, v.x, normal.x, pt.x, + u.y, v.y, normal.y, pt.y, + u.z, v.z, normal.z, pt.z}; +} + +/* + Create the top level acceleration structure containing geometry instances of the bottom level acceleration structure(s) +*/ +void RaytracingInvocationReorder::create_top_level_acceleration_structure(bool print_time) +{ + /* + Often, good performance can be obtained when the TLAS uses PREFER_FAST_TRACE with full rebuilds. + */ + assert(!!raytracing_scene); + VkTransformMatrixKHR transform_matrix = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f}; + + // This buffer is used to correlate the instance information with model information + // and is required because the number and type of instances is dynamic + std::vector model_instance_data; + + // Add the instances for the static scene, billboard texture, and refraction model + std::vector instances; + auto add_instance = [&](ModelBuffer &model_buffer, const VkTransformMatrixKHR &transform_matrix, uint32_t instance_index) { + VkAccelerationStructureInstanceKHR acceleration_structure_instance{}; + acceleration_structure_instance.transform = transform_matrix; + acceleration_structure_instance.instanceCustomIndex = instance_index; + acceleration_structure_instance.mask = 0xFF; + // Use object_type to select the appropriate hit group shader + // This creates shader divergence that SER can optimize by reordering threads + // Hit group 0 = OBJECT_NORMAL, Hit group 1 = OBJECT_REFRACTION, Hit group 2 = OBJECT_FLAME + acceleration_structure_instance.instanceShaderBindingTableRecordOffset = model_buffer.object_type; + acceleration_structure_instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + acceleration_structure_instance.accelerationStructureReference = model_buffer.bottom_level_acceleration_structure->get_device_address(); + instances.emplace_back(acceleration_structure_instance); + }; + + for (size_t i = 0; i < raytracing_scene->model_buffers.size(); ++i) + { + auto &model_buffer = raytracing_scene->model_buffers[i]; + + SceneInstanceData scene_instance{}; + scene_instance.vertex_index = static_cast(model_buffer.vertex_offset / sizeof(NewVertex)); + scene_instance.indices_index = static_cast(model_buffer.index_offset / sizeof(Triangle)); + scene_instance.object_type = model_buffer.object_type; + scene_instance.image_index = model_buffer.texture_index; + ASSERT_LOG(scene_instance.object_type == ObjectType::OBJECT_REFRACTION || scene_instance.image_index < raytracing_scene->imageInfos.size(), "Only the refraction model can be texture less.") + model_instance_data.emplace_back(scene_instance); + + // these objects have a single instance with the identity transform + switch (model_buffer.object_type) + { + case (ObjectType::OBJECT_NORMAL): + add_instance(model_buffer, transform_matrix, static_cast(i)); + break; + case (ObjectType::OBJECT_REFRACTION): + add_instance(model_buffer, calculate_rotation({-0.25, -2.5, -2.35}, 1.f, true), static_cast(i)); + break; + default: + // handle flame separately + break; + } + } + + { + // find the flame particle object, then add the particles as instances + auto &model_buffers = raytracing_scene->model_buffers; + auto iter = std::ranges::find_if(model_buffers, [](const ModelBuffer &model_buffer) { + return model_buffer.object_type == ObjectType::OBJECT_FLAME; + }); + ASSERT_LOG(iter != model_buffers.cend(), "Can't find flame object.") + auto &model_buffer = *iter; + uint32_t index = static_cast(std::distance(model_buffers.begin(), iter)); + for (auto &&particle : flame_generator.particles) + { + add_instance(model_buffer, calculate_rotation(particle.position, 0.25f, true), index); + } + } + + size_t data_to_model_size = model_instance_data.size() * sizeof(model_instance_data[0]); + if (!data_to_model_buffer || data_to_model_buffer->get_size() < data_to_model_size) + { + data_to_model_buffer = std::make_unique(get_device(), data_to_model_size, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + } + data_to_model_buffer->update(model_instance_data.data(), data_to_model_size, 0); + + const size_t instancesDataSize = sizeof(VkAccelerationStructureInstanceKHR) * instances.size(); + if (!instances_buffer || instances_buffer->get_size() != instancesDataSize) + { + instances_buffer = std::make_unique(get_device(), + instancesDataSize, + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + } + instances_buffer->update(instances.data(), instancesDataSize); + + // Top Level AS with single instance + if (instance_uid == std::numeric_limits::max()) // test if first time adding + { + instance_uid = top_level_acceleration_structure->add_instance_geometry(instances_buffer, static_cast(instances.size())); + } + else + { + top_level_acceleration_structure->update_instance_geometry(instance_uid, instances_buffer, static_cast(instances.size())); + } + top_level_acceleration_structure->build(queue); +} + +inline uint32_t aligned_size(uint32_t value, uint32_t alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + +namespace +{ +template +struct CopyBuffer +{ + std::vector operator()(std::unordered_map &buffers, const char *buffer_name) + { + auto iter = buffers.find(buffer_name); + if (iter == buffers.cend()) + { + return {}; + } + auto &buffer = iter->second; + std::vector out; + + const size_t sz = buffer.get_size(); + out.resize(sz / sizeof(T)); + const bool already_mapped = buffer.get_data() != nullptr; + if (!already_mapped) + { + buffer.map(); + } + memcpy(&out[0], buffer.get_data(), sz); + if (!already_mapped) + { + buffer.unmap(); + } + return out; + } +}; +} // namespace + +/* + Create scene geometry and ray tracing acceleration structures +*/ +void RaytracingInvocationReorder::create_scene() +{ + refraction_model.resize(grid_size * grid_size); + refraction_indices.resize(2 * grid_size * grid_size); + std::vector scenesToLoad; + const float sponza_scale = 0.01f; + const glm::mat4x4 sponza_transform{0.f, 0.f, sponza_scale, 0.f, + sponza_scale, 0.f, 0.f, 0.f, + 0.f, sponza_scale, 0.f, 0.f, + 0.f, 0.f, 0.f, 1.f}; + scenesToLoad.emplace_back("scenes/sponza/Sponza01.gltf", sponza_transform, ObjectType::OBJECT_NORMAL); + raytracing_scene = std::make_unique(get_device(), std::move(scenesToLoad)); + + create_flame_model(); + create_static_object_buffers(); + create_dynamic_object_buffers(0.f); + create_bottom_level_acceleration_structure(false); + top_level_acceleration_structure = std::make_unique(get_device(), VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); + create_top_level_acceleration_structure(); +} + +/* + Create the Shader Binding Tables that connects the ray tracing pipelines' programs and the top-level acceleration structure + + SBT Layout used in this sample: + + /-----------\ + | raygen | + |-----------| + | miss | + |-----------| + | hit | + \-----------/ +*/ + +void RaytracingInvocationReorder::create_shader_binding_tables() +{ + const uint32_t handle_size = ray_tracing_pipeline_properties.shaderGroupHandleSize; + const uint32_t handle_size_aligned = aligned_size(ray_tracing_pipeline_properties.shaderGroupHandleSize, ray_tracing_pipeline_properties.shaderGroupHandleAlignment); + auto group_count = static_cast(shader_groups.size()); + const uint32_t sbt_size = group_count * handle_size_aligned; + const VkBufferUsageFlags sbt_buffer_usage_flags = VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + const VmaMemoryUsage sbt_memory_usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + + // Number of hit groups (one per object type for SER demonstration) + const uint32_t hit_group_count = 3; // OBJECT_NORMAL, OBJECT_REFRACTION, OBJECT_FLAME + + // Create binding table buffers for each shader type + // Hit shader binding table needs space for all 3 hit groups + raygen_shader_binding_table = std::make_unique(get_device(), handle_size_aligned, sbt_buffer_usage_flags, sbt_memory_usage, 0); + miss_shader_binding_table = std::make_unique(get_device(), handle_size_aligned, sbt_buffer_usage_flags, sbt_memory_usage, 0); + hit_shader_binding_table = std::make_unique(get_device(), handle_size_aligned * hit_group_count, sbt_buffer_usage_flags, sbt_memory_usage, 0); + + // Copy the pipeline's shader handles into a host buffer + std::vector shader_handle_storage(sbt_size); + VK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(get_device().get_handle(), pipeline, 0, group_count, sbt_size, shader_handle_storage.data())); + + // Copy the shader handles from the host buffer to the binding tables + // Group 0: raygen + auto *data = static_cast(raygen_shader_binding_table->map()); + memcpy(data, shader_handle_storage.data(), handle_size); + raygen_shader_binding_table->unmap(); + + // Group 1: miss + data = static_cast(miss_shader_binding_table->map()); + memcpy(data, shader_handle_storage.data() + handle_size_aligned, handle_size); + miss_shader_binding_table->unmap(); + + // Groups 2-4: hit groups (OBJECT_NORMAL, OBJECT_REFRACTION, OBJECT_FLAME) + data = static_cast(hit_shader_binding_table->map()); + for (uint32_t i = 0; i < hit_group_count; ++i) + { + memcpy(data + handle_size_aligned * i, shader_handle_storage.data() + handle_size_aligned * (2 + i), handle_size); + } + hit_shader_binding_table->unmap(); +} + +/* + Create the descriptor sets used for the ray tracing dispatch +*/ +void RaytracingInvocationReorder::create_descriptor_sets() +{ + std::vector pool_sizes = { + {VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1}, + {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 5}, + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1}, + {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, static_cast(raytracing_scene->imageInfos.size())}}; + VkDescriptorPoolCreateInfo descriptor_pool_create_info = vkb::initializers::descriptor_pool_create_info(pool_sizes, 1); + VK_CHECK(vkCreateDescriptorPool(get_device().get_handle(), &descriptor_pool_create_info, nullptr, &descriptor_pool)); + + VkDescriptorSetAllocateInfo descriptor_set_allocate_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &descriptor_set_layout, 1); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &descriptor_set_allocate_info, &descriptor_set)); + + // Set up the descriptor for binding our top level acceleration structure to the ray tracing shaders + VkWriteDescriptorSetAccelerationStructureKHR descriptor_acceleration_structure_info{}; + descriptor_acceleration_structure_info.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR; + descriptor_acceleration_structure_info.accelerationStructureCount = 1; + auto rhs = top_level_acceleration_structure->get_handle(); + descriptor_acceleration_structure_info.pAccelerationStructures = &rhs; + + VkWriteDescriptorSet acceleration_structure_write{}; + acceleration_structure_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + acceleration_structure_write.dstSet = descriptor_set; + acceleration_structure_write.dstBinding = 0; + acceleration_structure_write.descriptorCount = 1; + acceleration_structure_write.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + // The acceleration structure descriptor has to be chained via pNext + acceleration_structure_write.pNext = &descriptor_acceleration_structure_info; + + VkDescriptorImageInfo image_descriptor{}; + image_descriptor.imageView = storage_image.view; + image_descriptor.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + VkDescriptorBufferInfo buffer_descriptor = create_descriptor(*ubo); + VkDescriptorBufferInfo vertex_descriptor = create_descriptor(*vertex_buffer); + VkDescriptorBufferInfo index_descriptor = create_descriptor(*index_buffer); + VkDescriptorBufferInfo dynamic_vertex_descriptor = create_descriptor(*dynamic_vertex_buffer); + VkDescriptorBufferInfo dynamic_index_descriptor = create_descriptor(*dynamic_index_buffer); + VkDescriptorBufferInfo data_map_descriptor = create_descriptor(*data_to_model_buffer); + + VkWriteDescriptorSet result_image_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, &image_descriptor); + VkWriteDescriptorSet uniform_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &buffer_descriptor); + VkWriteDescriptorSet vertex_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4, &vertex_descriptor); + VkWriteDescriptorSet index_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 5, &index_descriptor); + VkWriteDescriptorSet data_map_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 6, &data_map_descriptor); + VkWriteDescriptorSet texture_array_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 7, raytracing_scene->imageInfos.data(), static_cast(raytracing_scene->imageInfos.size())); + VkWriteDescriptorSet dynamic_vertex_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 8, &dynamic_vertex_descriptor); + VkWriteDescriptorSet dynamic_index_buffer_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 9, &dynamic_index_descriptor); + + std::vector write_descriptor_sets = { + acceleration_structure_write, + result_image_write, + uniform_buffer_write, + vertex_buffer_write, + index_buffer_write, + data_map_write, + texture_array_write, + dynamic_vertex_buffer_write, + dynamic_index_buffer_write}; + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0, VK_NULL_HANDLE); +} + +void RaytracingInvocationReorder::create_dynamic_object_buffers(float time) +{ + for (uint32_t i = 0; i < grid_size; ++i) + { + for (uint32_t j = 0; j < grid_size; ++j) + { + const float x = static_cast(i) / static_cast(grid_size); + const float y = static_cast(j) / static_cast(grid_size); + const float lateral_scale = std::min(std::min(std::min(std::min(x, 1 - x), y), 1 - y), 0.2f) * 5.f; + refraction_model[grid_size * i + j].normal = {0.f, 0.f, 0.f}; + refraction_model[grid_size * i + j].pos = {y - 0.5f, + 2 * x - 1.f, + lateral_scale * 0.025f * cos(2 * 3.14159 * (4 * x + time / 2))}; + refraction_model[grid_size * i + j].tex_coord = glm::vec2{x, y}; + + if (i + 1 < grid_size && j + 1 < grid_size) + { + refraction_indices[2 * (grid_size * i + j)] = Triangle{i * grid_size + j, (i + 1) * grid_size + j, i * grid_size + j + 1}; + refraction_indices[2 * (grid_size * i + j) + 1] = Triangle{(i + 1) * grid_size + j, (i + 1) * grid_size + j + 1, i * grid_size + j + 1}; + } + } + } + for (auto &&tri : refraction_indices) + { + glm::vec3 normal = glm::normalize(glm::cross(refraction_model[tri[1]].pos - refraction_model[tri[0]].pos, refraction_model[tri[2]].pos - refraction_model[tri[0]].pos)); + for (auto &&index : tri) + { + ASSERT_LOG(index >= 0 && index < refraction_model.size(), "Valid tri") + refraction_model[index].normal += normal; + } + } + + for (auto &&vert : refraction_model) + { + vert.normal = glm::normalize(vert.normal); + } + + size_t vertex_buffer_size = refraction_model.size() * sizeof(NewVertex); + size_t index_buffer_size = refraction_indices.size() * sizeof(refraction_indices[0]); + + if (!dynamic_vertex_buffer || !dynamic_index_buffer) + { + // note these flags are different because they will be read/write, in contrast to static + dynamic_vertex_buffer = std::make_unique(get_device(), vertex_buffer_size, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + dynamic_index_buffer = std::make_unique(get_device(), index_buffer_size, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + } + + dynamic_vertex_buffer->update(refraction_model.data(), vertex_buffer_size); + dynamic_index_buffer->update(refraction_indices.data(), index_buffer_size); + + auto assign_buffer = [&](ModelBuffer &buffer) { + buffer.vertex_offset = 0; + buffer.index_offset = 0; + buffer.is_static = false; + buffer.default_transform = VkTransformMatrixKHR{1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}; + buffer.num_vertices = refraction_model.size(); + buffer.num_triangles = refraction_indices.size(); + buffer.object_type = ObjectType::OBJECT_REFRACTION; + }; + bool found = false; + for (auto &&buffer : raytracing_scene->model_buffers) + { + if (buffer.object_type == OBJECT_REFRACTION) + { + assign_buffer(buffer); + found = true; + break; + } + } + if (!found) + { + ModelBuffer new_buffer; + assign_buffer(new_buffer); + raytracing_scene->model_buffers.emplace_back(std::move(new_buffer)); + } +} + +/* + Create our ray tracing pipeline +*/ +void RaytracingInvocationReorder::create_ray_tracing_pipeline() +{ + // Slot for binding top level acceleration structures to the ray generation shader + VkDescriptorSetLayoutBinding acceleration_structure_layout_binding{}; + acceleration_structure_layout_binding.binding = 0; + acceleration_structure_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; + acceleration_structure_layout_binding.descriptorCount = 1; + acceleration_structure_layout_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR; + + VkDescriptorSetLayoutBinding result_image_layout_binding{}; + result_image_layout_binding.binding = 1; + result_image_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + result_image_layout_binding.descriptorCount = 1; + result_image_layout_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR; + + VkDescriptorSetLayoutBinding uniform_buffer_binding{}; + uniform_buffer_binding.binding = 2; + uniform_buffer_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniform_buffer_binding.descriptorCount = 1; + uniform_buffer_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR; + + // Pass render mode constant + struct SpecialConsts_s + { + uint32_t renderMode = RenderMode::RENDER_DEFAULT; + uint32_t maxRays = 60; + } specialConsts; + std::vector specializationMapEntries; + specializationMapEntries.push_back(vkb::initializers::specialization_map_entry(0, offsetof(SpecialConsts_s, renderMode), sizeof(uint32_t))); + specializationMapEntries.push_back(vkb::initializers::specialization_map_entry(1, offsetof(SpecialConsts_s, maxRays), sizeof(uint32_t))); + VkSpecializationInfo specializationInfo = vkb::initializers::specialization_info( + static_cast(specializationMapEntries.size()), &specializationMapEntries.front(), sizeof(SpecialConsts_s), &specialConsts); + + VkDescriptorSetLayoutBinding vertex_binding{}; + vertex_binding.binding = 4; + vertex_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + vertex_binding.descriptorCount = 1; + vertex_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + + VkDescriptorSetLayoutBinding index_binding{}; + index_binding.binding = 5; + index_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + index_binding.descriptorCount = 1; + index_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + + VkDescriptorSetLayoutBinding data_map_binding{}; + data_map_binding.binding = 6; + data_map_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + data_map_binding.descriptorCount = 1; + data_map_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + + VkDescriptorSetLayoutBinding texture_array_binding{}; + texture_array_binding.binding = 7; + texture_array_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + texture_array_binding.descriptorCount = static_cast(raytracing_scene->imageInfos.size()); + texture_array_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + + VkDescriptorSetLayoutBinding dynamic_vertex_binding{}; + dynamic_vertex_binding.binding = 8; + dynamic_vertex_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + dynamic_vertex_binding.descriptorCount = 1; + dynamic_vertex_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + + VkDescriptorSetLayoutBinding dynamic_index_binding{}; + dynamic_index_binding.binding = 9; + dynamic_index_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + dynamic_index_binding.descriptorCount = 1; + dynamic_index_binding.stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR | VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR; + + std::vector bindings = { + acceleration_structure_layout_binding, + result_image_layout_binding, + uniform_buffer_binding, + vertex_binding, + index_binding, + data_map_binding, + texture_array_binding, + dynamic_vertex_binding, + dynamic_index_binding}; + + VkDescriptorSetLayoutCreateInfo layout_info{}; + layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layout_info.bindingCount = static_cast(bindings.size()); + layout_info.pBindings = bindings.data(); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &layout_info, nullptr, &descriptor_set_layout)); + + VkPipelineLayoutCreateInfo pipeline_layout_create_info{}; + pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_create_info.setLayoutCount = 1; + pipeline_layout_create_info.pSetLayouts = &descriptor_set_layout; + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &pipeline_layout_create_info, nullptr, &pipeline_layout)); + + /* + Setup ray tracing shader groups + Each shader group points at the corresponding shader in the pipeline + */ + std::vector shader_stages; + + // Force to use slang due to glslc not currently in public SDK supporting GL_EXT_shader_invocation_reorder; Remove when glslc supports. + set_shading_language(vkb::ShadingLanguage::SLANG); + + // Ray generation group + { + shader_stages.push_back(load_shader("ray_tracing_invocation_reorder", "raygen.rgen.spv", VK_SHADER_STAGE_RAYGEN_BIT_KHR)); + shader_stages.back().pSpecializationInfo = &specializationInfo; + VkRayTracingShaderGroupCreateInfoKHR raygen_group_ci{}; + raygen_group_ci.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + raygen_group_ci.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR; + raygen_group_ci.generalShader = static_cast(shader_stages.size()) - 1; + raygen_group_ci.closestHitShader = VK_SHADER_UNUSED_KHR; + raygen_group_ci.anyHitShader = VK_SHADER_UNUSED_KHR; + raygen_group_ci.intersectionShader = VK_SHADER_UNUSED_KHR; + shader_groups.push_back(raygen_group_ci); + } + + // Ray miss group + { + shader_stages.push_back(load_shader("ray_tracing_invocation_reorder", "miss.rmiss.spv", VK_SHADER_STAGE_MISS_BIT_KHR)); + VkRayTracingShaderGroupCreateInfoKHR miss_group_ci{}; + miss_group_ci.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + miss_group_ci.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR; + miss_group_ci.generalShader = static_cast(shader_stages.size()) - 1; + miss_group_ci.closestHitShader = VK_SHADER_UNUSED_KHR; + miss_group_ci.anyHitShader = VK_SHADER_UNUSED_KHR; + miss_group_ci.intersectionShader = VK_SHADER_UNUSED_KHR; + shader_groups.push_back(miss_group_ci); + } + + // Ray closest hit groups - one per object type for SER demonstration + // SER benefits from shader divergence - different shaders for different materials + // Hit group 0: OBJECT_NORMAL (diffuse surfaces with textures) + { + shader_stages.push_back(load_shader("ray_tracing_invocation_reorder", "closesthit_normal.rchit.spv", VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)); + shader_stages.back().pSpecializationInfo = &specializationInfo; + VkRayTracingShaderGroupCreateInfoKHR hit_group_ci{}; + hit_group_ci.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + hit_group_ci.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + hit_group_ci.generalShader = VK_SHADER_UNUSED_KHR; + hit_group_ci.closestHitShader = static_cast(shader_stages.size()) - 1; + hit_group_ci.anyHitShader = VK_SHADER_UNUSED_KHR; + hit_group_ci.intersectionShader = VK_SHADER_UNUSED_KHR; + shader_groups.push_back(hit_group_ci); + } + + // Hit group 1: OBJECT_REFRACTION (glass/refractive surfaces) + { + shader_stages.push_back(load_shader("ray_tracing_invocation_reorder", "closesthit_refraction.rchit.spv", VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)); + shader_stages.back().pSpecializationInfo = &specializationInfo; + VkRayTracingShaderGroupCreateInfoKHR hit_group_ci{}; + hit_group_ci.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + hit_group_ci.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + hit_group_ci.generalShader = VK_SHADER_UNUSED_KHR; + hit_group_ci.closestHitShader = static_cast(shader_stages.size()) - 1; + hit_group_ci.anyHitShader = VK_SHADER_UNUSED_KHR; + hit_group_ci.intersectionShader = VK_SHADER_UNUSED_KHR; + shader_groups.push_back(hit_group_ci); + } + + // Hit group 2: OBJECT_FLAME (emission/flame surfaces) + { + shader_stages.push_back(load_shader("ray_tracing_invocation_reorder", "closesthit_flame.rchit.spv", VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR)); + shader_stages.back().pSpecializationInfo = &specializationInfo; + VkRayTracingShaderGroupCreateInfoKHR hit_group_ci{}; + hit_group_ci.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR; + hit_group_ci.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR; + hit_group_ci.generalShader = VK_SHADER_UNUSED_KHR; + hit_group_ci.closestHitShader = static_cast(shader_stages.size()) - 1; + hit_group_ci.anyHitShader = VK_SHADER_UNUSED_KHR; + hit_group_ci.intersectionShader = VK_SHADER_UNUSED_KHR; + shader_groups.push_back(hit_group_ci); + } + + /* + Create the ray tracing pipeline + */ + VkRayTracingPipelineCreateInfoKHR raytracing_pipeline_create_info{}; + raytracing_pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR; + raytracing_pipeline_create_info.stageCount = static_cast(shader_stages.size()); + raytracing_pipeline_create_info.pStages = shader_stages.data(); + raytracing_pipeline_create_info.groupCount = static_cast(shader_groups.size()); + raytracing_pipeline_create_info.pGroups = shader_groups.data(); + raytracing_pipeline_create_info.maxPipelineRayRecursionDepth = 1; + raytracing_pipeline_create_info.layout = pipeline_layout; + VK_CHECK(vkCreateRayTracingPipelinesKHR(get_device().get_handle(), VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &raytracing_pipeline_create_info, nullptr, &pipeline)); +} + +#ifndef USE_FRAMEWORK_ACCELERATION_STRUCTURE +/* + Deletes all resources acquired by an acceleration structure +*/ +void RaytracingInvocationReorder::delete_acceleration_structure(AccelerationStructureExtended &acceleration_structure) +{ + if (acceleration_structure.buffer) + { + acceleration_structure.buffer.reset(); + } + if (acceleration_structure.handle) + { + vkDestroyAccelerationStructureKHR(get_device().get_handle(), acceleration_structure.handle, nullptr); + } +} +#endif + +/* + Create the uniform buffer used to pass matrices to the ray tracing ray generation shader +*/ +void RaytracingInvocationReorder::create_uniform_buffer() +{ + ubo = std::make_unique(get_device(), + sizeof(uniform_data), + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + ubo->convert_and_update(uniform_data); + update_uniform_buffers(); +} + +/* + Command buffer generation +*/ +void RaytracingInvocationReorder::build_command_buffers() +{ + if (width != storage_image.width || height != storage_image.height) + { + // If the view port size has changed, we need to recreate the storage image + vkDestroyImageView(get_device().get_handle(), storage_image.view, nullptr); + vkDestroyImage(get_device().get_handle(), storage_image.image, nullptr); + vkFreeMemory(get_device().get_handle(), storage_image.memory, nullptr); + create_storage_image(); + // The descriptor also needs to be updated to reference the new image + VkDescriptorImageInfo image_descriptor{}; + image_descriptor.imageView = storage_image.view; + image_descriptor.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + VkWriteDescriptorSet result_image_write = vkb::initializers::write_descriptor_set(descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1, &image_descriptor); + vkUpdateDescriptorSets(get_device().get_handle(), 1, &result_image_write, 0, VK_NULL_HANDLE); + } + + VkCommandBufferBeginInfo command_buffer_begin_info = vkb::initializers::command_buffer_begin_info(); + + auto device_ptr = get_device().get_handle(); + auto command_pool = get_device().get_command_pool().get_handle(); + if (!raytracing_command_buffers.empty()) + { + vkFreeCommandBuffers(device_ptr, command_pool, static_cast(raytracing_command_buffers.size()), &raytracing_command_buffers[0]); + raytracing_command_buffers.resize(0); + } + + raytracing_command_buffers.resize(draw_cmd_buffers.size()); + for (auto &&command_buffer : raytracing_command_buffers) + { + command_buffer = get_device().create_command_buffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, false); + } + + for (auto &raytracing_command_buffer : raytracing_command_buffers) + { + VK_CHECK(vkBeginCommandBuffer(raytracing_command_buffer, &command_buffer_begin_info)); + + /* + Set up the stride device address regions pointing at the shader identifiers in the shader binding table + */ + + const uint32_t handle_size_aligned = aligned_size(ray_tracing_pipeline_properties.shaderGroupHandleSize, ray_tracing_pipeline_properties.shaderGroupHandleAlignment); + + VkStridedDeviceAddressRegionKHR raygen_shader_sbt_entry{}; + raygen_shader_sbt_entry.deviceAddress = get_buffer_device_address(raygen_shader_binding_table->get_handle()); + raygen_shader_sbt_entry.stride = handle_size_aligned; + raygen_shader_sbt_entry.size = handle_size_aligned; + + VkStridedDeviceAddressRegionKHR miss_shader_sbt_entry{}; + miss_shader_sbt_entry.deviceAddress = get_buffer_device_address(miss_shader_binding_table->get_handle()); + miss_shader_sbt_entry.stride = handle_size_aligned; + miss_shader_sbt_entry.size = handle_size_aligned; + + // Hit shader SBT entry - contains 3 hit groups (one per object type for SER) + const uint32_t hit_group_count = 3; // OBJECT_NORMAL, OBJECT_REFRACTION, OBJECT_FLAME + VkStridedDeviceAddressRegionKHR hit_shader_sbt_entry{}; + hit_shader_sbt_entry.deviceAddress = get_buffer_device_address(hit_shader_binding_table->get_handle()); + hit_shader_sbt_entry.stride = handle_size_aligned; + hit_shader_sbt_entry.size = handle_size_aligned * hit_group_count; + + VkStridedDeviceAddressRegionKHR callable_shader_sbt_entry{}; + + std::vector barriers; + for (auto &&model_buffer : raytracing_scene->model_buffers) + { + if (!model_buffer.is_static) + { + VkBufferMemoryBarrier barrier = vkb::initializers::buffer_memory_barrier(); + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR; + barrier.buffer = model_buffer.bottom_level_acceleration_structure->get_buffer()->get_handle(); + barrier.size = model_buffer.bottom_level_acceleration_structure->get_buffer()->get_size(); + barriers.push_back(barrier); + } + } + + auto getBufferBarrier = [](const vkb::core::BufferC &buffer) { + VkBufferMemoryBarrier barrier = vkb::initializers::buffer_memory_barrier(); + barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.buffer = buffer.get_handle(); + barrier.size = buffer.get_size(); + return barrier; + }; + barriers.emplace_back(getBufferBarrier(*dynamic_vertex_buffer)); + barriers.emplace_back(getBufferBarrier(*dynamic_index_buffer)); + barriers.emplace_back(getBufferBarrier(*instances_buffer)); + barriers.emplace_back(getBufferBarrier(*ubo)); + + vkCmdPipelineBarrier(raytracing_command_buffer, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_HOST_BIT, 0, + 0, VK_NULL_HANDLE, // memory barrier + static_cast(barriers.size()), barriers.data(), // buffer memory barrier + 0, VK_NULL_HANDLE); // image memory barrier + + /* + Dispatch the ray tracing commands + */ + vkCmdBindPipeline(raytracing_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline); + vkCmdBindDescriptorSets(raytracing_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pipeline_layout, 0, 1, &descriptor_set, 0, nullptr); + + vkCmdTraceRaysKHR( + raytracing_command_buffer, + &raygen_shader_sbt_entry, + &miss_shader_sbt_entry, + &hit_shader_sbt_entry, + &callable_shader_sbt_entry, + width, + height, + 1); + + VK_CHECK(vkEndCommandBuffer(raytracing_command_buffer)); + } +} + +void RaytracingInvocationReorder::update_uniform_buffers() +{ + uniform_data.proj_inverse = glm::inverse(camera.matrices.perspective); + uniform_data.view_inverse = glm::inverse(camera.matrices.view); + uniform_data.enable_ser = ser_enabled ? 1 : 0; + uniform_data.use_coherence_hint = coherence_hint_enabled ? 1 : 0; + uniform_data.time = static_cast(std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start).count()) / 1000000.0f; + ubo->convert_and_update(uniform_data); +} + +bool RaytracingInvocationReorder::prepare(const vkb::ApplicationOptions &options) +{ + if (!ApiVulkanSample::prepare(options)) + { + return false; + } + + // This sample copies the ray traced output to the swap chain image, so we need to enable the required image usage flags + std::set image_usage_flags = {VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, VK_IMAGE_USAGE_TRANSFER_DST_BIT}; + get_render_context().update_swapchain(image_usage_flags); + + // Update render pass to load (preserve) color attachment content since we copy the ray traced image first + // Note: ApiVulkanSample::prepare() already called setup_render_pass() and setup_framebuffer() + // After update_swapchain(), we need to update swapchain_buffers, render_pass, and framebuffers + // to reference the new swapchain images + create_swapchain_buffers(); + update_render_pass_flags(RenderPassCreateFlags::ColorAttachmentLoad); + setup_framebuffer(); + + // Get the ray tracing pipeline properties, which we'll need later on in the sample + ray_tracing_pipeline_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR; + VkPhysicalDeviceProperties2 device_properties{}; + device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + device_properties.pNext = &ray_tracing_pipeline_properties; + vkGetPhysicalDeviceProperties2(get_device().get_gpu().get_handle(), &device_properties); + + // Get the acceleration structure features, which we'll need later on in the sample + acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR; + VkPhysicalDeviceFeatures2 device_features{}; + device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + device_features.pNext = &acceleration_structure_features; + vkGetPhysicalDeviceFeatures2(get_device().get_gpu().get_handle(), &device_features); + + // Query SER properties to check if device supports reordering + VkPhysicalDeviceProperties2 device_properties_reorder{}; + device_properties_reorder.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + + VkRayTracingInvocationReorderModeNV reorder_hint; + if (using_nv_extension) + { + invocation_reorder_properties_nv.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_NV; + device_properties_reorder.pNext = &invocation_reorder_properties_nv; + vkGetPhysicalDeviceProperties2(get_device().get_gpu().get_handle(), &device_properties_reorder); + reorder_hint = invocation_reorder_properties_nv.rayTracingInvocationReorderReorderingHint; + } +#ifdef VK_EXT_ray_tracing_invocation_reorder + else + { + invocation_reorder_properties_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_EXT; + device_properties_reorder.pNext = &invocation_reorder_properties_ext; + vkGetPhysicalDeviceProperties2(get_device().get_gpu().get_handle(), &device_properties_reorder); + reorder_hint = invocation_reorder_properties_ext.rayTracingInvocationReorderReorderingHint; + } +#endif + + // Check if device can actually reorder (not just provide hit objects) + ser_supported = (reorder_hint == VK_RAY_TRACING_INVOCATION_REORDER_MODE_REORDER_NV); + if (ser_supported) + { + LOGI("Shader Execution Reordering is supported and can reorder invocations"); + } + else + { + LOGI("Hit objects are supported, but invocation reordering is not available (no-op on this device)"); + } + + camera.type = vkb::CameraType::FirstPerson; + camera.set_perspective(60.0f, static_cast(width) / static_cast(height), 0.1f, 512.0f); + camera.set_rotation(glm::vec3(0.0f, 0.0f, 0.0f)); + camera.set_translation(glm::vec3(0.0f, 1.5f, 0.f)); + + create_storage_image(); + create_scene(); + create_uniform_buffer(); + create_ray_tracing_pipeline(); + create_shader_binding_tables(); + create_descriptor_sets(); + build_command_buffers(); + + prepared = true; + return true; +} + +void RaytracingInvocationReorder::draw() +{ + get_device().get_fence_pool().wait(); + get_device().get_fence_pool().reset(); + ASSERT_LOG(raytracing_command_buffers.size() == draw_cmd_buffers.size(), "The number of raytracing command buffers must match the render queue size") + ApiVulkanSample::prepare_frame(); + size_t i = current_buffer; + + VkSubmitInfo submit = vkb::initializers::submit_info(); + submit.commandBufferCount = 1; + submit.pCommandBuffers = &raytracing_command_buffers[i]; + + VK_CHECK(vkQueueSubmit(queue, 1, &submit, get_device().get_fence_pool().request_fence())); + get_device().get_fence_pool().wait(); + + recreate_current_command_buffer(); + VkCommandBufferBeginInfo begin = vkb::initializers::command_buffer_begin_info(); + VK_CHECK(vkBeginCommandBuffer(draw_cmd_buffers[i], &begin)); + + VkImageSubresourceRange subresource_range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + /* + Copy ray tracing output to swap chain image + */ + // Prepare current swap chain image as transfer destination + vkb::image_layout_transition(draw_cmd_buffers[i], + get_render_context().get_swapchain().get_images()[i], + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + {}, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + subresource_range); + + // Prepare ray tracing output image as transfer source + vkb::image_layout_transition(draw_cmd_buffers[i], + storage_image.image, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + {}, + VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + subresource_range); + + VkImageCopy copy_region{}; + copy_region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + copy_region.srcOffset = {0, 0, 0}; + copy_region.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + copy_region.dstOffset = {0, 0, 0}; + copy_region.extent = {width, height, 1}; + vkCmdCopyImage(draw_cmd_buffers[i], storage_image.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + get_render_context().get_swapchain().get_images()[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); + + // Transition swap chain image for render pass (render pass expects PRESENT_SRC_KHR as initial layout) + vkb::image_layout_transition(draw_cmd_buffers[i], + get_render_context().get_swapchain().get_images()[i], + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + subresource_range); + + // Transition ray tracing output image back to general layout + vkb::image_layout_transition(draw_cmd_buffers[i], + storage_image.image, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + {}, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_IMAGE_LAYOUT_GENERAL, + subresource_range); + + // Begin render pass for UI rendering + // Note: Even though we use ColorAttachmentLoad, the depth attachment still uses CLEAR, + // so we must provide clear values for both attachments + VkClearValue clear_values[2]; + clear_values[0].color = default_clear_color; + clear_values[1].depthStencil = {1.0f, 0}; + + VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.framebuffer = framebuffers[i]; + render_pass_begin_info.renderArea.offset.x = 0; + render_pass_begin_info.renderArea.offset.y = 0; + render_pass_begin_info.renderArea.extent.width = width; + render_pass_begin_info.renderArea.extent.height = height; + render_pass_begin_info.clearValueCount = 2; + render_pass_begin_info.pClearValues = clear_values; + + vkCmdBeginRenderPass(draw_cmd_buffers[i], &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + + draw_ui(draw_cmd_buffers[i]); + + vkCmdEndRenderPass(draw_cmd_buffers[i]); + + VK_CHECK(vkEndCommandBuffer(draw_cmd_buffers[i])); + + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &draw_cmd_buffers[current_buffer]; + VK_CHECK(vkQueueSubmit(queue, 1, &submit_info, get_device().get_fence_pool().request_fence())); + get_device().get_fence_pool().wait(); + ApiVulkanSample::submit_frame(); +} + +void RaytracingInvocationReorder::render(float delta_time) +{ + if (!prepared) + { + return; + } + frame_count = (frame_count + 1) % 60; + bool print_time = !frame_count; + auto time = std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start); + flame_generator.update_particles(delta_time); + create_dynamic_object_buffers(static_cast(time.count()) / 1000.f / 1000.f); + create_bottom_level_acceleration_structure(true, print_time); + create_top_level_acceleration_structure(print_time); + update_overlay(delta_time, []() {}); + draw(); + if (camera.updated) + { + update_uniform_buffers(); + } +} + +void RaytracingInvocationReorder::on_update_ui_overlay(vkb::Drawer &drawer) +{ + if (ser_supported) + { + if (drawer.checkbox("Enable Shader Execution Reordering (SER)", &ser_enabled)) + { + update_uniform_buffers(); + } + if (drawer.checkbox("Enable Coherence Hint", &coherence_hint_enabled)) + { + update_uniform_buffers(); + } + } +} + +std::unique_ptr create_ray_tracing_invocation_reorder() +{ + return std::make_unique(); +} + +RaytracingInvocationReorder::RaytracingScene::RaytracingScene(vkb::core::DeviceC &device, const std::vector &scenesToLoad) +{ + vkb::GLTFLoader loader{device}; + scenes.resize(scenesToLoad.size()); + for (size_t sceneIndex = 0; sceneIndex < scenesToLoad.size(); ++sceneIndex) + { + scenes[sceneIndex] = loader.read_scene_from_file(scenesToLoad[sceneIndex].filename); + ASSERT_LOG(scenes[sceneIndex], "Cannot load file") + auto &scene = scenes[sceneIndex]; + assert(!!scene); + for (auto &&mesh : scene->get_components()) + { + for (auto &&sub_mesh : mesh->get_submeshes()) + { + auto material = sub_mesh->get_material(); + auto &textures = material->textures; + size_t textureIndex = std::numeric_limits::max(); + auto baseTextureIter = textures.find("base_color_texture"); + bool is_vase = false; + if (baseTextureIter != textures.cend()) + { + auto texture = baseTextureIter->second; + if (!texture) + { + continue; + } + + const auto name = texture->get_image()->get_name(); + is_vase = (name.find("vase_dif.ktx") != std::basic_string::npos); + textureIndex = imageInfos.size(); + auto image = texture->get_image(); + VkDescriptorImageInfo imageInfo; + imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageInfo.imageView = image->get_vk_image_view().get_handle(); + imageInfo.sampler = baseTextureIter->second->get_sampler()->get_core_sampler().get_handle(); + imageInfos.push_back(imageInfo); + } + + auto pts_ = CopyBuffer{}(sub_mesh->vertex_buffers, "position"); + const auto UV_coords = CopyBuffer{}(sub_mesh->vertex_buffers, "texcoord_0"); + const auto normals_ = CopyBuffer{}(sub_mesh->vertex_buffers, "normal"); + + auto transform = scenesToLoad[sceneIndex].transform; + if (is_vase) + { + const float sponza_scale = 0.01f; + transform = glm::mat3x4{0.f, 0.f, sponza_scale, 4.3f, + sponza_scale, 0.f, 0.f, 0.f, + 0.f, sponza_scale, 0.f, 9.5f}; + } + for (auto &&pt : pts_) + { + const auto translation = glm::vec3(transform[0][3], transform[1][3], transform[2][3]); + pt = glm::vec3(glm::mat4(transform) * glm::vec4(pt, 1.f)) + translation; + } + + assert(textureIndex < std::numeric_limits::max()); + const auto textureIndex32 = static_cast(textureIndex); + Model model; + model.vertices.resize(pts_.size()); + for (size_t i = 0; i < pts_.size(); ++i) + { + auto tex_coords = i < UV_coords.size() ? UV_coords[i] : glm::vec2{}; + auto normal = i < normals_.size() ? normals_[i] : glm::vec3{}; + model.vertices[i].pos = pts_[i]; + model.vertices[i].normal = normal; + model.vertices[i].tex_coord = tex_coords; + } + + assert(sub_mesh->index_type == VK_INDEX_TYPE_UINT16); + auto buffer = sub_mesh->index_buffer.get(); + if (buffer) + { + const size_t sz = buffer->get_size(); + const size_t nTriangles = sz / sizeof(uint16_t) / 3; + model.triangles.resize(nTriangles); + auto ptr = buffer->get_data(); + assert(!!ptr); + std::vector tempBuffer(nTriangles * 3); + memcpy(&tempBuffer[0], ptr, sz); + for (size_t i = 0; i < nTriangles; ++i) + { + model.triangles[i] = {static_cast(tempBuffer[3 * i]), + static_cast(tempBuffer[3 * i + 1]), + static_cast(tempBuffer[3 * i + 2])}; + } + } + + model.default_transform = VkTransformMatrixKHR{1.f, 0.f, 0.f, 0.f, + 0.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f}; + model.texture_index = textureIndex32; + model.object_type = scenesToLoad[sceneIndex].object_type; + models.emplace_back(std::move(model)); + } + } + } +} diff --git a/samples/extensions/ray_tracing_invocation_reorder/ray_tracing_invocation_reorder.h b/samples/extensions/ray_tracing_invocation_reorder/ray_tracing_invocation_reorder.h new file mode 100644 index 000000000..6772e5b48 --- /dev/null +++ b/samples/extensions/ray_tracing_invocation_reorder/ray_tracing_invocation_reorder.h @@ -0,0 +1,284 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Demonstrates Shader Execution Reordering (SER) using VK_EXT_ray_tracing_invocation_reorder + * Shows how to use hit objects and reorderThreadEXT() to reduce divergence in ray tracing + */ + +#pragma once + +#define USE_FRAMEWORK_ACCELERATION_STRUCTURE + +#include "api_vulkan_sample.h" +#include + +class RaytracingInvocationReorder : public ApiVulkanSample +{ + public: + VkPhysicalDeviceRayTracingPipelinePropertiesKHR ray_tracing_pipeline_properties{}; + VkPhysicalDeviceAccelerationStructureFeaturesKHR acceleration_structure_features{}; +#ifdef VK_EXT_ray_tracing_invocation_reorder + VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT invocation_reorder_properties_ext{}; +#endif + VkPhysicalDeviceRayTracingInvocationReorderPropertiesNV invocation_reorder_properties_nv{}; + bool using_nv_extension = false; + bool ser_enabled = true; + bool coherence_hint_enabled = true; + bool ser_supported = false; + + enum RenderMode : uint32_t + { + RENDER_DEFAULT = 0, + RENDER_BARYCENTRIC = 1, + RENDER_INSTANCE_ID = 2, + RENDER_DISTANCE = 3, + RENDER_GLOBAL_XYZ = 4, + RENDER_SHADOW_MAP = 5, + RENDER_AO = 6 + }; + + enum ObjectType : uint32_t + { + OBJECT_NORMAL, // has AO and ray traced shadows + OBJECT_REFRACTION, // pass-through with IOR + OBJECT_FLAME // emission surface; constant amplitude + }; + + struct NewVertex; + struct Model; + + struct FlameParticle + { + glm::vec3 position; + glm::vec3 velocity; + float duration = 0.f; + }; + + struct FlameParticleGenerator + { + FlameParticleGenerator() = default; + + FlameParticleGenerator(glm::vec3 generator_origin, glm::vec3 generator_direction, float generator_radius, size_t n_particles) : + origin(generator_origin), direction(generator_direction), radius(generator_radius), n_particles(n_particles), generator(std::chrono::system_clock::now().time_since_epoch().count()) + { + using namespace glm; + u = normalize(abs(dot(generator_direction, vec3(0, 0, 1))) > 0.9f ? cross(generator_direction, vec3(1, 0, 0)) : cross(generator_direction, vec3(0, 0, 1))); + v = normalize(cross(generator_direction, u)); + + for (size_t i = 0; i < n_particles; ++i) + { + float starting_lifetime = generate_random() * lifetime; + particles.emplace_back(generateParticle(starting_lifetime)); + } + } + ~FlameParticleGenerator() = default; + FlameParticle generateParticle(float _lifetime = 0.f) const + { + using namespace glm; + const float theta = 2.f * 3.14159f * generate_random(); + const float R = radius * generate_random(); + const vec3 velocity_direction = generate_random_direction(); + + FlameParticle particle; + particle.position = origin + R * (sin(theta) * u + cos(theta) * v); + particle.velocity = generate_random() * 0.2f * velocity_direction; + particle.duration = _lifetime; + return particle; + } + glm::vec3 generate_random_direction() const + { + using namespace glm; + return normalize(0.2f * generate_random() * u + 0.2f * generate_random() * v + 0.8f * direction * generate_random()); + } + void update_particles(float time_delta) + { + particles.erase(std::remove_if(particles.begin(), particles.end(), [this, lifetime{this->lifetime}](const FlameParticle &particle) { + return particle.duration > (generate_random() * lifetime); + }), + particles.end()); + + for (auto &&particle : particles) + { + particle.position += time_delta * particle.velocity; + // particle.velocity = 0.75f * particle.velocity + 0.25f * generate_random_direction(); + particle.duration += time_delta; + } + + for (size_t i = particles.size(); i < n_particles; ++i) + { + particles.emplace_back(generateParticle(0.f)); + } + } + + float generate_random() const + { + std::uniform_real_distribution distribution = std::uniform_real_distribution(0, 1); + return distribution(generator); + } + + mutable std::default_random_engine generator; + std::vector particles; + glm::vec3 origin = {0, 0, 0}; + glm::vec3 direction = {0, 0, 0}; + glm::vec3 u = {0, 0, 0}, v = {0, 0, 0}; + float lifetime = 5; + float radius = 0.f; + size_t n_particles = 0; + }; + + FlameParticleGenerator flame_generator; + + struct ModelBuffer + { + size_t vertex_offset = std::numeric_limits::max(); // in bytes + size_t index_offset = std::numeric_limits::max(); // in bytes + size_t num_vertices = std::numeric_limits::max(); + size_t num_triangles = std::numeric_limits::max(); + uint32_t texture_index = std::numeric_limits::max(); + std::unique_ptr transform_matrix_buffer = nullptr; + VkAccelerationStructureBuildSizesInfoKHR buildSize; + VkAccelerationStructureGeometryKHR acceleration_structure_geometry; + VkAccelerationStructureBuildRangeInfoKHR buildRangeInfo; + std::unique_ptr bottom_level_acceleration_structure = nullptr; + VkTransformMatrixKHR default_transform; + uint32_t object_type = 0; + bool is_static = true; + uint64_t object_id = 0; + }; + + struct SceneOptions + { + bool use_vertex_staging_buffer = true; + } scene_options; + size_t frame_count = 0; + std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); + + // fixed buffers + std::unique_ptr vertex_buffer = nullptr; + std::unique_ptr index_buffer = nullptr; + std::unique_ptr dynamic_vertex_buffer = nullptr; + std::unique_ptr dynamic_index_buffer = nullptr; + std::unique_ptr instances_buffer = nullptr; + + struct SceneLoadInfo + { + SceneLoadInfo() = default; + SceneLoadInfo(const char *filename, glm::mat3x4 transform, uint32_t object_type) : + filename(filename), transform(transform), object_type(object_type) + {} + const char *filename = ""; + glm::mat3x4 transform; + uint32_t object_type = 0; + }; + + struct RaytracingScene + { + RaytracingScene() = default; + ~RaytracingScene() = default; + RaytracingScene(vkb::core::DeviceC &device, const std::vector &scenesToLoad); + std::vector> scenes; + std::vector imageInfos; + std::vector models; + std::vector model_buffers; + }; + + std::unique_ptr raytracing_scene; + Texture flame_texture; + +#ifdef USE_FRAMEWORK_ACCELERATION_STRUCTURE + std::unique_ptr top_level_acceleration_structure = nullptr; +#else + AccelerationStructureExtended top_level_acceleration_structure; +#endif + uint64_t instance_uid = std::numeric_limits::max(); + uint32_t index_count; + std::vector shader_groups{}; + + std::unique_ptr raygen_shader_binding_table; + std::unique_ptr miss_shader_binding_table; + std::unique_ptr hit_shader_binding_table; + + struct StorageImage + { + VkDeviceMemory memory; + VkImage image = VK_NULL_HANDLE; + VkImageView view; + VkFormat format; + uint32_t width; + uint32_t height; + StorageImage() : + memory(VK_NULL_HANDLE), image(VK_NULL_HANDLE), view(VK_NULL_HANDLE), format(), width(0), height(0) + {} + } storage_image; + + struct UniformData + { + glm::mat4 view_inverse; + glm::mat4 proj_inverse; + int32_t enable_ser; + int32_t use_coherence_hint; + float time; + } uniform_data; + std::unique_ptr ubo; + + struct SceneInstanceData + { + uint32_t vertex_index; // index of first data + uint32_t indices_index; + uint32_t image_index; + uint32_t object_type; // controls how shader handles object / whether to load from buffer for static objects or dynamic objects + }; + std::unique_ptr data_to_model_buffer; + + std::vector raytracing_command_buffers; + VkPipeline pipeline; + VkPipelineLayout pipeline_layout; + VkDescriptorSet descriptor_set; + VkDescriptorSetLayout descriptor_set_layout; + using Triangle = std::array; + uint32_t grid_size = 100; + std::vector refraction_model; + std::vector refraction_indices; + + RaytracingInvocationReorder(); + ~RaytracingInvocationReorder() override; + + void request_gpu_features(vkb::core::PhysicalDeviceC &gpu) override; + uint64_t get_buffer_device_address(VkBuffer buffer); + void create_storage_image(); + void create_static_object_buffers(); + void create_flame_model(); + void create_dynamic_object_buffers(float time); + void create_bottom_level_acceleration_structure(bool is_update, bool print_time = true); + VkTransformMatrixKHR calculate_rotation(glm::vec3 pt, float scale = 1.f, bool freeze_y = false); + void create_top_level_acceleration_structure(bool print_time = true); + + void create_scene(); + void create_shader_binding_tables(); + void create_descriptor_sets(); + void create_ray_tracing_pipeline(); + void create_uniform_buffer(); + void build_command_buffers() override; + void update_uniform_buffers(); + void draw(); + void on_update_ui_overlay(vkb::Drawer &drawer) override; + bool prepare(const vkb::ApplicationOptions &options) override; + void render(float delta_time) override; +}; + +std::unique_ptr create_ray_tracing_invocation_reorder(); diff --git a/shaders/ray_tracing_invocation_reorder/glsl/closesthit_flame.rchit b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_flame.rchit new file mode 100644 index 000000000..8331ae4a6 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_flame.rchit @@ -0,0 +1,157 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Closest-hit shader for OBJECT_FLAME (type 2) - emission/flame surfaces +// This shader is intentionally separate to create shader divergence for SER demonstration + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct Payload +{ + vec4 color; + vec4 intersection; // {x, y, z, intersectionType} + vec4 normal; // {nx, ny, nz, distance} +}; + +layout(location = 0) rayPayloadInEXT Payload hitValue; +hitAttributeEXT vec3 attribs; + +layout(binding=4, set = 0) readonly buffer VertexBuffer +{ + vec4[] data; +} vertex_buffer; + +layout(binding=5, set = 0) readonly buffer IndexBuffer +{ + uint[] indices; +} index_buffer; + +layout(binding=6, set = 0) readonly buffer DataMap +{ + uint[] indices; +} data_map; + +layout(binding=7, set = 0) uniform sampler2D textures[26]; + +layout (constant_id = 0) const uint render_mode = RENDER_DEFAULT; + +vec3 heatmap(float value, float minValue, float maxValue) +{ + float scaled = (min(max(value, minValue), maxValue) - minValue) / (maxValue - minValue); + float r = scaled * (3.14159265359 / 2.); + return vec3(sin(r), sin(2 * r), cos(r)); +} + +struct Vertex +{ + vec3 pt; + vec3 normal; + vec2 coordinate; +}; + +Vertex getVertex(uint vertexOffset, uint index) +{ + uint base_index = 2 * (vertexOffset + index); + vec4 A = vertex_buffer.data[base_index]; + vec4 B = vertex_buffer.data[base_index + 1]; + + Vertex v; + v.pt = A.xyz; + v.normal = vec3(A.w, B.x, B.y); + v.coordinate = vec2(B.z, B.w); + return v; +} + +uvec3 getIndices(uint triangle_offset, uint primitive_id) +{ + uint base_index = 3 * (triangle_offset + primitive_id); + uint index0 = index_buffer.indices[base_index]; + uint index1 = index_buffer.indices[base_index + 1]; + uint index2 = index_buffer.indices[base_index + 2]; + + return uvec3(index0, index1, index2); +} + +void main() +{ + const vec3 barycentricCoords = vec3(1.0f - attribs.x - attribs.y, attribs.x, attribs.y); + + if (render_mode == RENDER_BARYCENTRIC) { + hitValue.color = vec4(barycentricCoords, 1); + return; + } else if (render_mode == RENDER_INSTANCE_ID) { + hitValue.color = vec4(heatmap(gl_InstanceCustomIndexEXT, 0, 25), 1); + return; + } else if (render_mode == RENDER_DISTANCE) { + hitValue.color = vec4(heatmap(log(1 + gl_HitTEXT), 0, log(1 + 25)), 1); + return; + } + + uint index = gl_InstanceCustomIndexEXT; + + uint vertexOffset = data_map.indices[4 * index]; + uint triangleOffset = data_map.indices[4*index + 1]; + uint imageOffset = data_map.indices[4 * index + 2]; + const uint objectType = 2; // OBJECT_FLAME + + uvec3 indices = getIndices(triangleOffset, gl_PrimitiveID); + Vertex A = getVertex(vertexOffset, indices.x); + Vertex B = getVertex(vertexOffset, indices.y); + Vertex C = getVertex(vertexOffset, indices.z); + + // interpolate and obtain world point + float alpha = barycentricCoords.x, beta = barycentricCoords.y, gamma = barycentricCoords.z; + vec3 pt = alpha * A.pt + beta * B.pt + gamma * C.pt; + vec3 worldPt = gl_WorldRayOriginEXT + gl_HitTEXT * gl_WorldRayDirectionEXT; + vec3 worldNormal = normalize(cross(B.pt - A.pt, C.pt - A.pt)); + + vec2 texcoord = alpha * A.coordinate + beta * B.coordinate + gamma * C.coordinate; + + hitValue.intersection = vec4(worldPt.xyz, objectType); + hitValue.normal = vec4(worldNormal.xyz, gl_HitTEXT); + + if (render_mode == RENDER_GLOBAL_XYZ) { + hitValue.color = vec4(heatmap(worldPt.x, -10, 10), 1); + return; + } + + // Flame/emission material - texture lookup with emission boost + // This is different computation than diffuse or refraction, creating shader divergence + if (imageOffset < 26) { + vec4 tex_value = textureLod(textures[nonuniformEXT(imageOffset)], texcoord, 0); + + // Emission computation - flames glow and don't receive shadows + // Add some procedural flame flickering based on position + float flicker = 0.8 + 0.2 * sin(worldPt.x * 10.0 + worldPt.y * 15.0); + + // Boost emission intensity + float emission = 2.0 * flicker; + + // Flame color with emission + hitValue.color = vec4(tex_value.rgb * emission, tex_value.a); + } +} diff --git a/shaders/ray_tracing_invocation_reorder/glsl/closesthit_flame.rchit.spv b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_flame.rchit.spv new file mode 100644 index 000000000..7e768d130 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_flame.rchit.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/glsl/closesthit_normal.rchit b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_normal.rchit new file mode 100644 index 000000000..31b5a2424 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_normal.rchit @@ -0,0 +1,151 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Closest-hit shader for OBJECT_NORMAL (type 0) - diffuse surfaces with textures +// This shader is intentionally separate to create shader divergence for SER demonstration + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct Payload +{ + vec4 color; + vec4 intersection; // {x, y, z, intersectionType} + vec4 normal; // {nx, ny, nz, distance} +}; + +layout(location = 0) rayPayloadInEXT Payload hitValue; +hitAttributeEXT vec3 attribs; + +layout(binding=4, set = 0) readonly buffer VertexBuffer +{ + vec4[] data; +} vertex_buffer; + +layout(binding=5, set = 0) readonly buffer IndexBuffer +{ + uint[] indices; +} index_buffer; + +layout(binding=6, set = 0) readonly buffer DataMap +{ + uint[] indices; +} data_map; + +layout(binding=7, set = 0) uniform sampler2D textures[26]; + +layout (constant_id = 0) const uint render_mode = RENDER_DEFAULT; + +vec3 heatmap(float value, float minValue, float maxValue) +{ + float scaled = (min(max(value, minValue), maxValue) - minValue) / (maxValue - minValue); + float r = scaled * (3.14159265359 / 2.); + return vec3(sin(r), sin(2 * r), cos(r)); +} + +struct Vertex +{ + vec3 pt; + vec3 normal; + vec2 coordinate; +}; + +Vertex getVertex(uint vertexOffset, uint index) +{ + uint base_index = 2 * (vertexOffset + index); + vec4 A = vertex_buffer.data[base_index]; + vec4 B = vertex_buffer.data[base_index + 1]; + + Vertex v; + v.pt = A.xyz; + v.normal = vec3(A.w, B.x, B.y); + v.coordinate = vec2(B.z, B.w); + return v; +} + +uvec3 getIndices(uint triangle_offset, uint primitive_id) +{ + uint base_index = 3 * (triangle_offset + primitive_id); + uint index0 = index_buffer.indices[base_index]; + uint index1 = index_buffer.indices[base_index + 1]; + uint index2 = index_buffer.indices[base_index + 2]; + + return uvec3(index0, index1, index2); +} + +void main() +{ + const vec3 barycentricCoords = vec3(1.0f - attribs.x - attribs.y, attribs.x, attribs.y); + + if (render_mode == RENDER_BARYCENTRIC) { + hitValue.color = vec4(barycentricCoords, 1); + return; + } else if (render_mode == RENDER_INSTANCE_ID) { + hitValue.color = vec4(heatmap(gl_InstanceCustomIndexEXT, 0, 25), 1); + return; + } else if (render_mode == RENDER_DISTANCE) { + hitValue.color = vec4(heatmap(log(1 + gl_HitTEXT), 0, log(1 + 25)), 1); + return; + } + + uint index = gl_InstanceCustomIndexEXT; + + uint vertexOffset = data_map.indices[4 * index]; + uint triangleOffset = data_map.indices[4*index + 1]; + uint imageOffset = data_map.indices[4 * index + 2]; + const uint objectType = 0; // OBJECT_NORMAL + + uvec3 indices = getIndices(triangleOffset, gl_PrimitiveID); + Vertex A = getVertex(vertexOffset, indices.x); + Vertex B = getVertex(vertexOffset, indices.y); + Vertex C = getVertex(vertexOffset, indices.z); + + // interpolate and obtain world point + float alpha = barycentricCoords.x, beta = barycentricCoords.y, gamma = barycentricCoords.z; + vec3 pt = alpha * A.pt + beta * B.pt + gamma * C.pt; + vec3 worldPt = gl_WorldRayOriginEXT + gl_HitTEXT * gl_WorldRayDirectionEXT; + vec3 worldNormal = normalize(cross(B.pt - A.pt, C.pt - A.pt)); + + vec2 texcoord = alpha * A.coordinate + beta * B.coordinate + gamma * C.coordinate; + + hitValue.intersection = vec4(worldPt.xyz, objectType); + hitValue.normal = vec4(worldNormal.xyz, gl_HitTEXT); + + if (render_mode == RENDER_GLOBAL_XYZ) { + hitValue.color = vec4(heatmap(worldPt.x, -10, 10), 1); + return; + } + + // Diffuse texture lookup with additional computation to create divergence + if (imageOffset < 26) { + vec4 tex_value = textureLod(textures[nonuniformEXT(imageOffset)], texcoord, 0); + + // Additional diffuse shading computation (creates work divergence from other shaders) + float diffuse = max(dot(worldNormal, normalize(vec3(0, -1, 0))), 0.0); + float ambient = 0.3; + hitValue.color = tex_value * (ambient + 0.7 * diffuse); + } +} diff --git a/shaders/ray_tracing_invocation_reorder/glsl/closesthit_normal.rchit.spv b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_normal.rchit.spv new file mode 100644 index 000000000..5a041fa52 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_normal.rchit.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/glsl/closesthit_refraction.rchit b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_refraction.rchit new file mode 100644 index 000000000..eefdb74a0 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_refraction.rchit @@ -0,0 +1,162 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_nonuniform_qualifier : enable + +// Closest-hit shader for OBJECT_REFRACTION (type 1) - glass/refractive surfaces +// This shader is intentionally separate to create shader divergence for SER demonstration + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct Payload +{ + vec4 color; + vec4 intersection; // {x, y, z, intersectionType} + vec4 normal; // {nx, ny, nz, distance} +}; + +layout(location = 0) rayPayloadInEXT Payload hitValue; +hitAttributeEXT vec3 attribs; + +layout(binding=4, set = 0) readonly buffer VertexBuffer +{ + vec4[] data; +} vertex_buffer; + +layout(binding=5, set = 0) readonly buffer IndexBuffer +{ + uint[] indices; +} index_buffer; + +layout(binding=6, set = 0) readonly buffer DataMap +{ + uint[] indices; +} data_map; + +layout(binding=8, set = 0) readonly buffer DynamicVertexBuffer +{ + vec4[] data; +} dynamic_vertex_buffer; + +layout(binding=9, set = 0) readonly buffer DynamicIndexBuffer +{ + uint[] indices; +} dynamic_index_buffer; + +layout (constant_id = 0) const uint render_mode = RENDER_DEFAULT; + +vec3 heatmap(float value, float minValue, float maxValue) +{ + float scaled = (min(max(value, minValue), maxValue) - minValue) / (maxValue - minValue); + float r = scaled * (3.14159265359 / 2.); + return vec3(sin(r), sin(2 * r), cos(r)); +} + +struct Vertex +{ + vec3 pt; + vec3 normal; + vec2 coordinate; +}; + +Vertex getVertex(uint vertexOffset, uint index) +{ + // OBJECT_REFRACTION uses dynamic buffers (is_static = false when objectType == 1) + uint base_index = 2 * (vertexOffset + index); + vec4 A = dynamic_vertex_buffer.data[base_index]; + vec4 B = dynamic_vertex_buffer.data[base_index + 1]; + + Vertex v; + v.pt = A.xyz; + v.normal = vec3(A.w, B.x, B.y); + v.coordinate = vec2(B.z, B.w); + return v; +} + +uvec3 getIndices(uint triangle_offset, uint primitive_id) +{ + uint base_index = 3 * (triangle_offset + primitive_id); + uint index0 = dynamic_index_buffer.indices[base_index]; + uint index1 = dynamic_index_buffer.indices[base_index + 1]; + uint index2 = dynamic_index_buffer.indices[base_index + 2]; + + return uvec3(index0, index1, index2); +} + +void main() +{ + const vec3 barycentricCoords = vec3(1.0f - attribs.x - attribs.y, attribs.x, attribs.y); + + if (render_mode == RENDER_BARYCENTRIC) { + hitValue.color = vec4(barycentricCoords, 1); + return; + } else if (render_mode == RENDER_INSTANCE_ID) { + hitValue.color = vec4(heatmap(gl_InstanceCustomIndexEXT, 0, 25), 1); + return; + } else if (render_mode == RENDER_DISTANCE) { + hitValue.color = vec4(heatmap(log(1 + gl_HitTEXT), 0, log(1 + 25)), 1); + return; + } + + uint index = gl_InstanceCustomIndexEXT; + + uint vertexOffset = data_map.indices[4 * index]; + uint triangleOffset = data_map.indices[4*index + 1]; + const uint objectType = 1; // OBJECT_REFRACTION + + uvec3 indices = getIndices(triangleOffset, gl_PrimitiveID); + Vertex A = getVertex(vertexOffset, indices.x); + Vertex B = getVertex(vertexOffset, indices.y); + Vertex C = getVertex(vertexOffset, indices.z); + + // interpolate and obtain world point + float alpha = barycentricCoords.x, beta = barycentricCoords.y, gamma = barycentricCoords.z; + vec3 pt = alpha * A.pt + beta * B.pt + gamma * C.pt; + vec3 worldPt = gl_WorldRayOriginEXT + gl_HitTEXT * gl_WorldRayDirectionEXT; + vec3 worldNormal = normalize(cross(B.pt - A.pt, C.pt - A.pt)); + + vec2 texcoord = alpha * A.coordinate + beta * B.coordinate + gamma * C.coordinate; + + hitValue.intersection = vec4(worldPt.xyz, objectType); + hitValue.normal = vec4(worldNormal.xyz, gl_HitTEXT); + + if (render_mode == RENDER_GLOBAL_XYZ) { + hitValue.color = vec4(heatmap(worldPt.x, -10, 10), 1); + return; + } + + // Refraction/glass material - compute IOR based on position + // This is different computation than diffuse, creating shader divergence + const float base_IOR = 1.01; + const float x = texcoord.x, y = texcoord.y; + const float t = min(min(min(min(x, 1-x), y), 1-y), 0.5) / 0.5; + const float IOR = t * base_IOR + (1 - t) * 1; + + // Additional Fresnel computation for glass (creates more divergence) + float cosTheta = abs(dot(worldNormal, normalize(gl_WorldRayDirectionEXT))); + float fresnel = pow(1.0 - cosTheta, 5.0); + + hitValue.color = vec4(IOR, fresnel, 0, 0); +} diff --git a/shaders/ray_tracing_invocation_reorder/glsl/closesthit_refraction.rchit.spv b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_refraction.rchit.spv new file mode 100644 index 000000000..34492bf70 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/glsl/closesthit_refraction.rchit.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/glsl/miss.rmiss b/shaders/ray_tracing_invocation_reorder/glsl/miss.rmiss new file mode 100644 index 000000000..3da57f44d --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/glsl/miss.rmiss @@ -0,0 +1,37 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 460 +#extension GL_EXT_ray_tracing : enable + +struct Payload +{ + vec4 color; + vec4 intersection; // {x, y, z, intersectionType} + vec4 normal; // {nx, ny, nz, distance} +}; + +layout(location = 0) rayPayloadInEXT Payload hitValue; + +void main() +{ + // Simple gradient background + vec3 skyColor = mix(vec3(0.3, 0.5, 0.8), vec3(0.1, 0.2, 0.4), gl_WorldRayDirectionEXT.y * 0.5 + 0.5); + hitValue.color = vec4(skyColor, 1.0); + hitValue.intersection.w = 100; // mark miss + hitValue.normal.w = 10000; // large distance +} diff --git a/shaders/ray_tracing_invocation_reorder/glsl/miss.rmiss.spv b/shaders/ray_tracing_invocation_reorder/glsl/miss.rmiss.spv new file mode 100644 index 000000000..70a76242b Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/glsl/miss.rmiss.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/glsl/raygen.rgen b/shaders/ray_tracing_invocation_reorder/glsl/raygen.rgen new file mode 100644 index 000000000..55371a59d --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/glsl/raygen.rgen @@ -0,0 +1,195 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 460 +#extension GL_EXT_ray_tracing : enable +#extension GL_EXT_shader_invocation_reorder : enable + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +layout(binding = 0, set = 0) uniform accelerationStructureEXT topLevelAS; +layout(binding = 1, set = 0, rgba8) uniform image2D image; +layout(binding = 2, set = 0) uniform CameraProperties +{ + mat4 viewInverse; + mat4 projInverse; + int enableSER; + int useCoherenceHint; + float time; +} cam; + +struct Payload +{ + vec4 color; + vec4 intersection; // {x, y, z, intersectionType} + vec4 normal; // {nx, ny, nz, distance} +}; + +layout(location = 0) rayPayloadEXT Payload hitValue; +layout (constant_id = 0) const uint render_mode = RENDER_DEFAULT; +layout (constant_id = 1) const uint maxRays = 12; + +void main() +{ + const vec2 pixelCenter = vec2(gl_LaunchIDEXT.xy) + vec2(0.5); + const vec2 inUV = pixelCenter/vec2(gl_LaunchSizeEXT.xy); + vec2 d = inUV * 2.0 - 1.0; + + vec4 origin = cam.viewInverse * vec4(0,0,0,1); + vec4 target = cam.projInverse * vec4(d.x, d.y, 1, 1) ; + vec4 direction = cam.viewInverse*vec4(normalize(target.xyz), 0) ; + + float tmin = 0.001; + float tmax = 10000.0; + + uint max_rays = maxRays; + if (render_mode != RENDER_DEFAULT) + { + max_rays = 1; + } + + uint object_type = 100; + vec4 color = vec4(0, 0, 0, 0); + // 0 = normal, 1 = shadow, 2 = AO + uint current_mode = 0; + float expectedDistance = -1; + + // Primary ray — when SER is enabled, reorder with hit objects for the first trace + if (cam.enableSER != 0) + { + hitObjectEXT hitObj; + hitObjectRecordEmptyEXT(hitObj); + hitObjectTraceRayEXT(hitObj, topLevelAS, gl_RayFlagsOpaqueEXT, 0xff, 0, 0, 0, origin.xyz, tmin, direction.xyz, tmax, 0); + if (cam.useCoherenceHint != 0) + { + uint hint = 0; + if (hitObjectIsHitEXT(hitObj)) + { + hint = hitObjectGetInstanceIdEXT(hitObj); + } + reorderThreadEXT(hitObj, hint, 8); + } + else + { + reorderThreadEXT(hitObj); + } + hitObjectExecuteShaderEXT(hitObj, 0); + object_type = uint(hitValue.intersection.w); + } + else + { + traceRayEXT(topLevelAS, gl_RayFlagsOpaqueEXT, 0xff, 0, 0, 0, origin.xyz, tmin, direction.xyz, tmax, 0); + object_type = uint(hitValue.intersection.w); + } + + // Follow-on rays from extended sample (shadow and AO), left as-is without SER + const vec3 object_intersection_pt = hitValue.intersection.xyz; + const vec3 object_normal = hitValue.normal.xyz; + if (render_mode != RENDER_DEFAULT) + { + color = hitValue.color; + } + else if (object_type == 0) + { + vec4 newColor = hitValue.color; + // shadow + { + const float shadow_mult = 2; + const float shadow_scale = 0.25; + vec3 lightPt = vec3(0, -20, 0); + vec3 currentDirection = lightPt - hitValue.intersection.xyz; + expectedDistance = sqrt(dot(currentDirection, currentDirection)); + currentDirection = normalize(currentDirection); + traceRayEXT(topLevelAS, gl_RayFlagsOpaqueEXT, 0xff, 0, 0, 0, object_intersection_pt, tmin, currentDirection, tmax, 0); + float actDistance = hitValue.normal.w; + float scale = actDistance < expectedDistance ? shadow_scale : 1; + scale = min(scale * shadow_mult, 1); + newColor.xyz *= scale; + current_mode = 101; + if (render_mode == RENDER_SHADOW_MAP) + { + imageStore(image, ivec2(gl_LaunchIDEXT.xy), vec4(scale, scale, scale, 1)); + return; + } + } + // ambient occlusion + { + const float ao_mult = 1; + uint max_ao_each = 2; // 2x2=4 AO rays + uint max_ao = max_ao_each * max_ao_each; + const float max_dist = 2; + float accumulated_ao = 0.f; + vec3 u = abs(dot(object_normal, vec3(0, 0, 1))) > 0.9 ? cross(object_normal, vec3(1, 0, 0)) : cross(object_normal, vec3(0, 0, 1)); + vec3 v = cross(object_normal, u); + float accumulated_factor = 0; + for (uint j = 0; j < max_ao_each; ++j) + { + float phi = 0.5*(-3.14159 + 2 * 3.14159 * (float(j + 1) / float(max_ao_each + 2))); + for (uint k = 0; k < max_ao_each; ++k){ + float theta = 0.5*(-3.14159 + 2 * 3.14159 * (float(k + 1) / float(max_ao_each + 2))); + float x = cos(phi) * sin(theta); + float y = sin(phi) * sin(theta); + float z = cos(theta); + vec3 dir2 = x * u + y * v + z * object_normal; + traceRayEXT(topLevelAS, gl_RayFlagsOpaqueEXT, 0xff, 0, 0, 0, object_intersection_pt, tmin, dir2, tmax, 0); + float ao = min(hitValue.normal.w, max_dist); + float factor = 0.2 + 0.8 * z * z; + accumulated_factor += factor; + accumulated_ao += ao * factor; + } + } + accumulated_ao /= (max_dist * accumulated_factor); + accumulated_ao *= accumulated_ao; + accumulated_ao = max(min((accumulated_ao) * ao_mult, 1), 0); + if (render_mode == RENDER_AO) + { + imageStore(image, ivec2(gl_LaunchIDEXT.xy), vec4(accumulated_ao, accumulated_ao, accumulated_ao, 1)); + return; + } + newColor.xyz *= accumulated_ao; + const float r = max(0, 1 - color.a); + color += r * vec4(newColor.rgb, 1); + } + } + else if (object_type == 1) + { + // Refractive / reflective path (simplified): just output the first-hit color + color = hitValue.color; + } + else if (object_type == 2) + { + // Flame particle with alpha blending (matches extended sample) + vec4 newColor = hitValue.color; + float r = 1 - color.a; + color.rgb += r * newColor.rgb * newColor.a; + color.a += 0.1 * r * newColor.a; + } + + // Override for non-default render modes (but not for flame particles which need alpha) + if (render_mode != RENDER_DEFAULT && object_type != 2) + { + color = hitValue.color; + } + + imageStore(image, ivec2(gl_LaunchIDEXT.xy), color); +} diff --git a/shaders/ray_tracing_invocation_reorder/glsl/raygen.rgen.spv b/shaders/ray_tracing_invocation_reorder/glsl/raygen.rgen.spv new file mode 100644 index 000000000..31d5dbe36 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/glsl/raygen.rgen.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.slang b/shaders/ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.slang new file mode 100644 index 000000000..64f8f01a5 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.slang @@ -0,0 +1,141 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct Payload +{ + float4 color; + float4 intersection; // {x, y, z, intersectionType} + float4 normal; // {nx, ny, nz, distance} +}; + +struct Attributes +{ + float2 bary; +}; + +// Buffers/textures layout must match descriptor set layout (set=0, bindings 4..9) +StructuredBuffer vertex_buffer : register(t4); +StructuredBuffer index_buffer : register(t5); +StructuredBuffer data_map : register(t6); +Texture2D textures[26] : register(t7); +SamplerState samplers[26] : register(s7); + +[[vk::constant_id(0)]] const uint render_mode = RENDER_DEFAULT; + +float3 heatmap(float value, float minValue, float maxValue) +{ + float scaled = (min(max(value, minValue), maxValue) - minValue) / (maxValue - minValue); + float r = scaled * (3.14159265359 / 2.0); + return float3(sin(r), sin(2.0 * r), cos(r)); +} + +struct Vertex +{ + float3 pt; + float3 normal; + float2 coordinate; +}; + +Vertex getVertex(uint vertexOffset, uint index) +{ + uint base_index = 2 * (vertexOffset + index); + float4 A = vertex_buffer[base_index]; + float4 B = vertex_buffer[base_index + 1]; + + Vertex v; + v.pt = A.xyz; + v.normal = float3(A.w, B.x, B.y); + v.coordinate = float2(B.z, B.w); + return v; +} + +uint3 getIndices(uint triangle_offset, uint primitive_id) +{ + uint base_index = 3 * (triangle_offset + primitive_id); + uint index0 = index_buffer[base_index]; + uint index1 = index_buffer[base_index + 1]; + uint index2 = index_buffer[base_index + 2]; + return uint3(index0, index1, index2); +} + +[shader("closesthit")] +void main(inout Payload hitValue, in Attributes attribs) +{ + const float3 barycentricCoords = float3(1.0f - attribs.bary.x - attribs.bary.y, attribs.bary.x, attribs.bary.y); + + if (render_mode == RENDER_BARYCENTRIC) + { + hitValue.color = float4(barycentricCoords, 1); + return; + } + else if (render_mode == RENDER_INSTANCE_ID) + { + hitValue.color = float4(heatmap(InstanceID(), 0, 25), 1); + return; + } + else if (render_mode == RENDER_DISTANCE) + { + hitValue.color = float4(heatmap(log(1 + RayTCurrent()), 0, log(1 + 25)), 1); + return; + } + + uint index = InstanceID(); + + uint vertexOffset = data_map[4 * index]; + uint triangleOffset = data_map[4 * index + 1]; + uint imageOffset = data_map[4 * index + 2]; + const uint objectType = 2; // OBJECT_FLAME + + uint3 indices = getIndices(triangleOffset, PrimitiveIndex()); + Vertex A = getVertex(vertexOffset, indices.x); + Vertex B = getVertex(vertexOffset, indices.y); + Vertex C = getVertex(vertexOffset, indices.z); + + // interpolate and obtain world point + float alpha = barycentricCoords.x, beta = barycentricCoords.y, gamma = barycentricCoords.z; + float3 pt = alpha * A.pt + beta * B.pt + gamma * C.pt; + float3 worldPt = WorldRayOrigin() + RayTCurrent() * WorldRayDirection(); + float3 worldNormal = normalize(cross(B.pt - A.pt, C.pt - A.pt)); + + float2 texcoord = alpha * A.coordinate + beta * B.coordinate + gamma * C.coordinate; + + hitValue.intersection = float4(worldPt.xyz, objectType); + hitValue.normal = float4(worldNormal.xyz, RayTCurrent()); + + if (render_mode == RENDER_GLOBAL_XYZ) + { + hitValue.color = float4(heatmap(worldPt.x, -10, 10), 1); + return; + } + + // Flame/emission material - texture lookup with emission boost + if (imageOffset < 26) + { + float4 tex_value = textures[NonUniformResourceIndex(imageOffset)].SampleLevel(samplers[NonUniformResourceIndex(imageOffset)], texcoord, 0); + float flicker = 0.8 + 0.2 * sin(worldPt.x * 10.0 + worldPt.y * 15.0); + float emission = 2.0 * flicker; + hitValue.color = float4(tex_value.rgb * emission, tex_value.a); + } +} diff --git a/shaders/ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.spv b/shaders/ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.spv new file mode 100644 index 000000000..3c6341eb5 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/slang/closesthit_flame.rchit.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.slang b/shaders/ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.slang new file mode 100644 index 000000000..b94e99360 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.slang @@ -0,0 +1,141 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct Payload +{ + float4 color; + float4 intersection; // {x, y, z, intersectionType} + float4 normal; // {nx, ny, nz, distance} +}; + +struct Attributes +{ + float2 bary; +}; + +// Buffers/textures layout must match descriptor set layout in the sample (set=0, bindings 4..9) +StructuredBuffer vertex_buffer : register(t4); +StructuredBuffer index_buffer : register(t5); +StructuredBuffer data_map : register(t6); +Texture2D textures[26] : register(t7); +SamplerState samplers[26] : register(s7); + +[[vk::constant_id(0)]] const uint render_mode = RENDER_DEFAULT; + +float3 heatmap(float value, float minValue, float maxValue) +{ + float scaled = (min(max(value, minValue), maxValue) - minValue) / (maxValue - minValue); + float r = scaled * (3.14159265359 / 2.0); + return float3(sin(r), sin(2.0 * r), cos(r)); +} + +struct Vertex +{ + float3 pt; + float3 normal; + float2 coordinate; +}; + +Vertex getVertex(uint vertexOffset, uint index) +{ + uint base_index = 2 * (vertexOffset + index); + float4 A = vertex_buffer[base_index]; + float4 B = vertex_buffer[base_index + 1]; + + Vertex v; + v.pt = A.xyz; + v.normal = float3(A.w, B.x, B.y); + v.coordinate = float2(B.z, B.w); + return v; +} + +uint3 getIndices(uint triangle_offset, uint primitive_id) +{ + uint base_index = 3 * (triangle_offset + primitive_id); + uint index0 = index_buffer[base_index]; + uint index1 = index_buffer[base_index + 1]; + uint index2 = index_buffer[base_index + 2]; + return uint3(index0, index1, index2); +} + +[shader("closesthit")] +void main(inout Payload hitValue, in Attributes attribs) +{ + const float3 barycentricCoords = float3(1.0f - attribs.bary.x - attribs.bary.y, attribs.bary.x, attribs.bary.y); + + if (render_mode == RENDER_BARYCENTRIC) + { + hitValue.color = float4(barycentricCoords, 1); + return; + } + else if (render_mode == RENDER_INSTANCE_ID) + { + hitValue.color = float4(heatmap(InstanceID(), 0, 25), 1); + return; + } + else if (render_mode == RENDER_DISTANCE) + { + hitValue.color = float4(heatmap(log(1 + RayTCurrent()), 0, log(1 + 25)), 1); + return; + } + + uint index = InstanceID(); + + uint vertexOffset = data_map[4 * index]; + uint triangleOffset = data_map[4 * index + 1]; + uint imageOffset = data_map[4 * index + 2]; + const uint objectType = 0; // OBJECT_NORMAL + + uint3 indices = getIndices(triangleOffset, PrimitiveIndex()); + Vertex A = getVertex(vertexOffset, indices.x); + Vertex B = getVertex(vertexOffset, indices.y); + Vertex C = getVertex(vertexOffset, indices.z); + + // interpolate and obtain world point + float alpha = barycentricCoords.x, beta = barycentricCoords.y, gamma = barycentricCoords.z; + float3 pt = alpha * A.pt + beta * B.pt + gamma * C.pt; + float3 worldPt = WorldRayOrigin() + RayTCurrent() * WorldRayDirection(); + float3 worldNormal = normalize(cross(B.pt - A.pt, C.pt - A.pt)); + + float2 texcoord = alpha * A.coordinate + beta * B.coordinate + gamma * C.coordinate; + + hitValue.intersection = float4(worldPt.xyz, objectType); + hitValue.normal = float4(worldNormal.xyz, RayTCurrent()); + + if (render_mode == RENDER_GLOBAL_XYZ) + { + hitValue.color = float4(heatmap(worldPt.x, -10, 10), 1); + return; + } + + // Diffuse texture lookup with simple lighting for divergence + if (imageOffset < 26) + { + float4 tex_value = textures[NonUniformResourceIndex(imageOffset)].SampleLevel(samplers[NonUniformResourceIndex(imageOffset)], texcoord, 0); + float diffuse = max(dot(worldNormal, normalize(float3(0, -1, 0))), 0.0); + float ambient = 0.3; + hitValue.color = tex_value * (ambient + 0.7 * diffuse); + } +} diff --git a/shaders/ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.spv b/shaders/ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.spv new file mode 100644 index 000000000..8a3c8b313 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/slang/closesthit_normal.rchit.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.slang b/shaders/ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.slang new file mode 100644 index 000000000..27c58c778 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.slang @@ -0,0 +1,139 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct Payload +{ + float4 color; + float4 intersection; // {x, y, z, intersectionType} + float4 normal; // {nx, ny, nz, distance} +}; + +struct Attributes +{ + float2 bary; +}; + +// Buffers layout (set=0, bindings 4..9) +StructuredBuffer dynamic_vertex_buffer : register(t8); +StructuredBuffer dynamic_index_buffer : register(t9); +StructuredBuffer data_map : register(t6); + +[[vk::constant_id(0)]] const uint render_mode = RENDER_DEFAULT; + +float3 heatmap(float value, float minValue, float maxValue) +{ + float scaled = (min(max(value, minValue), maxValue) - minValue) / (maxValue - minValue); + float r = scaled * (3.14159265359 / 2.0); + return float3(sin(r), sin(2.0 * r), cos(r)); +} + +struct Vertex +{ + float3 pt; + float3 normal; + float2 coordinate; +}; + +Vertex getVertex(uint vertexOffset, uint index) +{ + uint base_index = 2 * (vertexOffset + index); + float4 A = dynamic_vertex_buffer[base_index]; + float4 B = dynamic_vertex_buffer[base_index + 1]; + + Vertex v; + v.pt = A.xyz; + v.normal = float3(A.w, B.x, B.y); + v.coordinate = float2(B.z, B.w); + return v; +} + +uint3 getIndices(uint triangle_offset, uint primitive_id) +{ + uint base_index = 3 * (triangle_offset + primitive_id); + uint index0 = dynamic_index_buffer[base_index]; + uint index1 = dynamic_index_buffer[base_index + 1]; + uint index2 = dynamic_index_buffer[base_index + 2]; + return uint3(index0, index1, index2); +} + +[shader("closesthit")] +void main(inout Payload hitValue, in Attributes attribs) +{ + const float3 barycentricCoords = float3(1.0f - attribs.bary.x - attribs.bary.y, attribs.bary.x, attribs.bary.y); + + if (render_mode == RENDER_BARYCENTRIC) + { + hitValue.color = float4(barycentricCoords, 1); + return; + } + else if (render_mode == RENDER_INSTANCE_ID) + { + hitValue.color = float4(heatmap(InstanceID(), 0, 25), 1); + return; + } + else if (render_mode == RENDER_DISTANCE) + { + hitValue.color = float4(heatmap(log(1 + RayTCurrent()), 0, log(1 + 25)), 1); + return; + } + + uint index = InstanceID(); + + uint vertexOffset = data_map[4 * index]; + uint triangleOffset = data_map[4 * index + 1]; + const uint objectType = 1; // OBJECT_REFRACTION + + uint3 indices = getIndices(triangleOffset, PrimitiveIndex()); + Vertex A = getVertex(vertexOffset, indices.x); + Vertex B = getVertex(vertexOffset, indices.y); + Vertex C = getVertex(vertexOffset, indices.z); + + // interpolate and obtain world point + float alpha = barycentricCoords.x, beta = barycentricCoords.y, gamma = barycentricCoords.z; + float3 pt = alpha * A.pt + beta * B.pt + gamma * C.pt; + float3 worldPt = WorldRayOrigin() + RayTCurrent() * WorldRayDirection(); + float3 worldNormal = normalize(cross(B.pt - A.pt, C.pt - A.pt)); + + float2 texcoord = alpha * A.coordinate + beta * B.coordinate + gamma * C.coordinate; + + hitValue.intersection = float4(worldPt.xyz, objectType); + hitValue.normal = float4(worldNormal.xyz, RayTCurrent()); + + if (render_mode == RENDER_GLOBAL_XYZ) + { + hitValue.color = float4(heatmap(worldPt.x, -10, 10), 1); + return; + } + + // Refraction/glass material - compute IOR and Fresnel + const float base_IOR = 1.01; + const float x = texcoord.x, y = texcoord.y; + const float t = min(min(min(min(x, 1 - x), y), 1 - y), 0.5) / 0.5; + const float IOR = t * base_IOR + (1 - t) * 1; + float cosTheta = abs(dot(worldNormal, normalize(WorldRayDirection()))); + float fresnel = pow(1.0 - cosTheta, 5.0); + + hitValue.color = float4(IOR, fresnel, 0, 0); +} diff --git a/shaders/ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.spv b/shaders/ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.spv new file mode 100644 index 000000000..12e49b6d2 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/slang/closesthit_refraction.rchit.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/slang/miss.rmiss.slang b/shaders/ray_tracing_invocation_reorder/slang/miss.rmiss.slang new file mode 100644 index 000000000..6c99e53c8 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/slang/miss.rmiss.slang @@ -0,0 +1,30 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +struct RayPayload +{ + float3 hitValue; +}; + +[shader("miss")] +void main(inout RayPayload payload) +{ + // Simple gradient background + float3 rayDir = WorldRayDirection(); + float3 skyColor = lerp(float3(0.3, 0.5, 0.8), float3(0.1, 0.2, 0.4), rayDir.y * 0.5 + 0.5); + payload.hitValue = skyColor; +} diff --git a/shaders/ray_tracing_invocation_reorder/slang/miss.rmiss.spv b/shaders/ray_tracing_invocation_reorder/slang/miss.rmiss.spv new file mode 100644 index 000000000..c5c4f93b2 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/slang/miss.rmiss.spv differ diff --git a/shaders/ray_tracing_invocation_reorder/slang/raygen.rgen.slang b/shaders/ray_tracing_invocation_reorder/slang/raygen.rgen.slang new file mode 100644 index 000000000..806ee4078 --- /dev/null +++ b/shaders/ray_tracing_invocation_reorder/slang/raygen.rgen.slang @@ -0,0 +1,251 @@ +/* Copyright (c) 2025-2026, Holochip Inc. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define RENDER_DEFAULT 0 +#define RENDER_BARYCENTRIC 1 +#define RENDER_INSTANCE_ID 2 +#define RENDER_DISTANCE 3 +#define RENDER_GLOBAL_XYZ 4 +#define RENDER_SHADOW_MAP 5 +#define RENDER_AO 6 + +struct CameraProperties +{ + float4x4 viewInverse; + float4x4 projInverse; + int enableSER; + int useCoherenceHint; + float time; +}; + +struct Payload +{ + float4 color; + float4 intersection; // {x, y, z, intersectionType} + float4 normal; // {nx, ny, nz, distance} +}; + +RaytracingAccelerationStructure topLevelAS; +RWTexture2D image; +ConstantBuffer cam; + +[[vk::constant_id(0)]] const uint render_mode = RENDER_DEFAULT; +[[vk::constant_id(1)]] const uint maxRays = 12; + +[shader("raygeneration")] +void main() +{ + uint3 launchID = DispatchRaysIndex(); + uint3 launchSize = DispatchRaysDimensions(); + + float2 pixelCenter = float2(launchID.xy) + float2(0.5, 0.5); + float2 inUV = pixelCenter / float2(launchSize.xy); + float2 d = inUV * 2.0 - 1.0; + + float4 origin = mul(cam.viewInverse, float4(0, 0, 0, 1)); + float4 target = mul(cam.projInverse, float4(d.x, d.y, 1, 1)); + float4 direction = mul(cam.viewInverse, float4(normalize(target.xyz), 0)); + + float tmin = 0.001; + float tmax = 10000.0; + + uint max_rays = maxRays; + if (render_mode != RENDER_DEFAULT) + { + max_rays = 1; + } + + uint object_type = 100; + float4 color = float4(0, 0, 0, 0); + uint current_mode = 0; + float expectedDistance = -1; + + Payload payload; + payload.color = float4(0, 0, 0, 0); + payload.intersection = float4(0, 0, 0, 0); + payload.normal = float4(0, 0, 0, 0); + + // Loop to trace through multiple objects (flame particles, refractive surfaces) + // Similar to ray_tracing_extended - continues until alpha is saturated or max rays reached + for (uint rayIndex = 0; rayIndex < max_rays && current_mode < 100 && color.a < 0.95; ++rayIndex) + { + // Primary ray - when SER is enabled, reorder with hit objects for the first trace + if (cam.enableSER != 0 && rayIndex == 0) + { + RayDesc ray; + ray.Origin = origin.xyz; + ray.Direction = direction.xyz; + ray.TMin = tmin; + ray.TMax = tmax; + + HitObject hitObj = HitObject::TraceRay( + topLevelAS, + RAY_FLAG_NONE, + 0xff, + 0, + 0, + 0, + ray, + payload); + + if (cam.useCoherenceHint != 0) + { + uint hint = 0; + if (hitObj.IsHit()) + { + hint = hitObj.GetInstanceIndex(); + } + ReorderThread(hitObj, hint, 8); + } + else + { + ReorderThread(hitObj); + } + + HitObject::Invoke(topLevelAS, hitObj, payload); + object_type = uint(payload.intersection.w); + } + else + { + RayDesc ray; + ray.Origin = origin.xyz; + ray.Direction = direction.xyz; + ray.TMin = tmin; + ray.TMax = tmax; + + TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 0, 0, 0, ray, payload); + object_type = uint(payload.intersection.w); + } + + // Secondary rays (shadow and AO) - apply SER for better coherence + float3 object_intersection_pt = payload.intersection.xyz; + float3 object_normal = payload.normal.xyz; + + if (render_mode != RENDER_DEFAULT) + { + color = payload.color; + break; + } + else if (object_type == 0) + { + float4 newColor = payload.color; + // shadow + { + const float shadow_mult = 2; + const float shadow_scale = 0.25; + float3 lightPt = float3(0, -20, 0); + float3 currentDirection = lightPt - payload.intersection.xyz; + float expectedDistance = sqrt(dot(currentDirection, currentDirection)); + currentDirection = normalize(currentDirection); + + RayDesc shadowRay; + shadowRay.Origin = object_intersection_pt; + shadowRay.Direction = currentDirection; + shadowRay.TMin = tmin; + shadowRay.TMax = tmax; + + TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 0, 0, 0, shadowRay, payload); + + float actDistance = payload.normal.w; + float scale = actDistance < expectedDistance ? shadow_scale : 1.0; + scale = min(scale * shadow_mult, 1.0); + newColor.xyz *= scale; + current_mode = 101; + if (render_mode == RENDER_SHADOW_MAP) + { + image[launchID.xy] = float4(scale, scale, scale, 1); + return; + } + } + // ambient occlusion - standard TraceRay (SER overhead in loops is too high) + { + const float ao_mult = 1; + uint max_ao_each = 2; // 2x2=4 AO rays + const float max_dist = 2; + float accumulated_ao = 0.0; + float3 u = abs(dot(object_normal, float3(0, 0, 1))) > 0.9 ? cross(object_normal, float3(1, 0, 0)) : cross(object_normal, float3(0, 0, 1)); + float3 v = cross(object_normal, u); + float accumulated_factor = 0; + for (uint j = 0; j < max_ao_each; ++j) + { + float phi = 0.5 * (-3.14159 + 2 * 3.14159 * (float(j + 1) / float(max_ao_each + 2))); + for (uint k = 0; k < max_ao_each; ++k) + { + float theta = 0.5 * (-3.14159 + 2 * 3.14159 * (float(k + 1) / float(max_ao_each + 2))); + float x = cos(phi) * sin(theta); + float y = sin(phi) * sin(theta); + float z = cos(theta); + float3 dir2 = x * u + y * v + z * object_normal; + + RayDesc aoRay; + aoRay.Origin = object_intersection_pt; + aoRay.Direction = dir2; + aoRay.TMin = tmin; + aoRay.TMax = tmax; + + // AO rays use standard TraceRay - SER overhead in loops is too high + TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 0, 0, 0, aoRay, payload); + + float ao = min(payload.normal.w, max_dist); + float factor = 0.2 + 0.8 * z * z; + accumulated_factor += factor; + accumulated_ao += ao * factor; + } + } + accumulated_ao /= (max_dist * accumulated_factor); + accumulated_ao *= accumulated_ao; + accumulated_ao = max(min((accumulated_ao) * ao_mult, 1), 0); + if (render_mode == RENDER_AO) + { + image[launchID.xy] = float4(accumulated_ao, accumulated_ao, accumulated_ao, 1); + return; + } + + newColor.xyz *= accumulated_ao; + const float r = max(0, 1 - color.a); + color += r * float4(newColor.rgb, 1); + } + } + else if (object_type == 1) + { + // Refractive / reflective path - continue ray through the surface + origin = float4(payload.intersection.xyz, 0); + float IOR = payload.color.x; + float max_IOR = 1.01; + float eta = 1 / IOR; + float c = abs(dot(object_normal, direction.xyz)); + float t = (IOR - 1) / (max_IOR - 1); + direction = float4(normalize((1 - t) * direction.xyz + t * (eta * direction.xyz + (eta * c - sqrt(1 - eta*eta*(1 - c*c))) * object_normal)), 0); + } + else if (object_type == 2) + { + // Flame particle with alpha blending - continue ray through particle + float4 newColor = payload.color; + float r = 1 - color.a; + color.rgb += r * newColor.rgb * newColor.a; + color.a += 0.1 * r * newColor.a; + origin = float4(payload.intersection.xyz, 0); + } + else + { + // Unknown object type or miss - stop tracing + break; + } + } + + image[launchID.xy] = color; +} diff --git a/shaders/ray_tracing_invocation_reorder/slang/raygen.rgen.spv b/shaders/ray_tracing_invocation_reorder/slang/raygen.rgen.spv new file mode 100644 index 000000000..31d5dbe36 Binary files /dev/null and b/shaders/ray_tracing_invocation_reorder/slang/raygen.rgen.spv differ