Skip to content

Commit dfd10f1

Browse files
authored
Optimize solari initial and temporal DI (#20156)
# Objective Optimizes the initial + temporal ReSTIR DI pass. In the example cornell box scene, it goes from 2.37 ms to 1.97 ms. On a more complex scene, the Lumberyard bistro, with many emissive lights, it goes from 130+ ms to about 80.8 ms at 1440p on a 4070. I also noticed that triangle area calculation didn't take object scale into account and fixed that. ## Solution - Switch to textures instead of buffers for reservoir storage. - There's also a bunch of other micro-optimizations to increase SM occupancy and reduce memory pressure. ## Testing - This was tested on the cornell box example scene and bistro. - Everything was tested on Windows 11 with a 4070 running at 1440p. Testing on other platforms and GPUs by running the example. --- ## Showcase <img width="2560" height="1392" alt="image" src="https://github.com/user-attachments/assets/b53f66e8-c97d-4f94-b30f-bbe6ade1a507" />
1 parent 6eca318 commit dfd10f1

File tree

5 files changed

+97
-60
lines changed

5 files changed

+97
-60
lines changed

crates/bevy_solari/src/realtime/node.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use bevy_render::{
2222
},
2323
BindGroupEntries, BindGroupLayout, BindGroupLayoutEntries, CachedComputePipelineId,
2424
ComputePassDescriptor, ComputePipelineDescriptor, PipelineCache, PushConstantRange,
25-
ShaderStages, StorageTextureAccess, TextureSampleType,
25+
ShaderStages, StorageTextureAccess, TextureFormat, TextureSampleType,
2626
},
2727
renderer::{RenderContext, RenderDevice},
2828
view::{ViewTarget, ViewUniform, ViewUniformOffset, ViewUniforms},
@@ -132,8 +132,8 @@ impl ViewNode for SolariLightingNode {
132132
view_target.get_unsampled_color_attachment().view,
133133
s.light_tile_samples.as_entire_binding(),
134134
s.light_tile_resolved_samples.as_entire_binding(),
135-
s.di_reservoirs_a.as_entire_binding(),
136-
s.di_reservoirs_b.as_entire_binding(),
135+
&s.di_reservoirs_a.1,
136+
&s.di_reservoirs_b.1,
137137
s.gi_reservoirs_a.as_entire_binding(),
138138
s.gi_reservoirs_b.as_entire_binding(),
139139
gbuffer,
@@ -300,8 +300,8 @@ impl FromWorld for SolariLightingNode {
300300
),
301301
storage_buffer_sized(false, None),
302302
storage_buffer_sized(false, None),
303-
storage_buffer_sized(false, None),
304-
storage_buffer_sized(false, None),
303+
texture_storage_2d(TextureFormat::Rgba32Uint, StorageTextureAccess::ReadWrite),
304+
texture_storage_2d(TextureFormat::Rgba32Uint, StorageTextureAccess::ReadWrite),
305305
storage_buffer_sized(false, None),
306306
storage_buffer_sized(false, None),
307307
texture_2d(TextureSampleType::Uint),

crates/bevy_solari/src/realtime/prepare.rs

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use bevy_render::{
1313
camera::ExtractedCamera,
1414
render_resource::{
1515
Buffer, BufferDescriptor, BufferUsages, Texture, TextureDescriptor, TextureDimension,
16-
TextureUsages, TextureView, TextureViewDescriptor,
16+
TextureFormat, TextureUsages, TextureView, TextureViewDescriptor,
1717
},
1818
renderer::RenderDevice,
1919
};
@@ -24,9 +24,6 @@ const LIGHT_SAMPLE_STRUCT_SIZE: u64 = 8;
2424
/// Size of the `ResolvedLightSamplePacked` shader struct in bytes.
2525
const RESOLVED_LIGHT_SAMPLE_STRUCT_SIZE: u64 = 24;
2626

27-
/// Size of the DI `Reservoir` shader struct in bytes.
28-
const DI_RESERVOIR_STRUCT_SIZE: u64 = 16;
29-
3027
/// Size of the GI `Reservoir` shader struct in bytes.
3128
const GI_RESERVOIR_STRUCT_SIZE: u64 = 48;
3229

@@ -41,8 +38,8 @@ pub const WORLD_CACHE_SIZE: u64 = 2u64.pow(20);
4138
pub struct SolariLightingResources {
4239
pub light_tile_samples: Buffer,
4340
pub light_tile_resolved_samples: Buffer,
44-
pub di_reservoirs_a: Buffer,
45-
pub di_reservoirs_b: Buffer,
41+
pub di_reservoirs_a: (Texture, TextureView),
42+
pub di_reservoirs_b: (Texture, TextureView),
4643
pub gi_reservoirs_a: Buffer,
4744
pub gi_reservoirs_b: Buffer,
4845
pub previous_gbuffer: (Texture, TextureView),
@@ -101,33 +98,33 @@ pub fn prepare_solari_lighting_resources(
10198
mapped_at_creation: false,
10299
});
103100

104-
let di_reservoirs_a = render_device.create_buffer(&BufferDescriptor {
105-
label: Some("solari_lighting_di_reservoirs_a"),
106-
size: (view_size.x * view_size.y) as u64 * DI_RESERVOIR_STRUCT_SIZE,
107-
usage: BufferUsages::STORAGE,
108-
mapped_at_creation: false,
109-
});
110-
111-
let di_reservoirs_b = render_device.create_buffer(&BufferDescriptor {
112-
label: Some("solari_lighting_di_reservoirs_b"),
113-
size: (view_size.x * view_size.y) as u64 * DI_RESERVOIR_STRUCT_SIZE,
114-
usage: BufferUsages::STORAGE,
115-
mapped_at_creation: false,
116-
});
117-
118-
let gi_reservoirs_a = render_device.create_buffer(&BufferDescriptor {
119-
label: Some("solari_lighting_gi_reservoirs_a"),
120-
size: (view_size.x * view_size.y) as u64 * GI_RESERVOIR_STRUCT_SIZE,
121-
usage: BufferUsages::STORAGE,
122-
mapped_at_creation: false,
123-
});
101+
let di_reservoirs = |name| {
102+
let tex = render_device.create_texture(&TextureDescriptor {
103+
label: Some(name),
104+
size: view_size.to_extents(),
105+
mip_level_count: 1,
106+
sample_count: 1,
107+
dimension: TextureDimension::D2,
108+
format: TextureFormat::Rgba32Uint,
109+
usage: TextureUsages::STORAGE_BINDING,
110+
view_formats: &[],
111+
});
112+
let view = tex.create_view(&TextureViewDescriptor::default());
113+
(tex, view)
114+
};
115+
let di_reservoirs_a = di_reservoirs("solari_lighting_di_reservoirs_a");
116+
let di_reservoirs_b = di_reservoirs("solari_lighting_di_reservoirs_b");
124117

125-
let gi_reservoirs_b = render_device.create_buffer(&BufferDescriptor {
126-
label: Some("solari_lighting_gi_reservoirs_b"),
127-
size: (view_size.x * view_size.y) as u64 * GI_RESERVOIR_STRUCT_SIZE,
128-
usage: BufferUsages::STORAGE,
129-
mapped_at_creation: false,
130-
});
118+
let gi_reservoirs = |name| {
119+
render_device.create_buffer(&BufferDescriptor {
120+
label: Some(name),
121+
size: (view_size.x * view_size.y) as u64 * GI_RESERVOIR_STRUCT_SIZE,
122+
usage: BufferUsages::STORAGE,
123+
mapped_at_creation: false,
124+
})
125+
};
126+
let gi_reservoirs_a = gi_reservoirs("solari_lighting_gi_reservoirs_a");
127+
let gi_reservoirs_b = gi_reservoirs("solari_lighting_gi_reservoirs_b");
131128

132129
let previous_gbuffer = render_device.create_texture(&TextureDescriptor {
133130
label: Some("solari_lighting_previous_gbuffer"),

crates/bevy_solari/src/realtime/restir_di.wgsl

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
@group(1) @binding(0) var view_output: texture_storage_2d<rgba16float, read_write>;
1616
@group(1) @binding(1) var<storage, read_write> light_tile_samples: array<LightSample>;
1717
@group(1) @binding(2) var<storage, read_write> light_tile_resolved_samples: array<ResolvedLightSamplePacked>;
18-
@group(1) @binding(3) var<storage, read_write> di_reservoirs_a: array<Reservoir>;
19-
@group(1) @binding(4) var<storage, read_write> di_reservoirs_b: array<Reservoir>;
18+
@group(1) @binding(3) var di_reservoirs_a: texture_storage_2d<rgba32uint, read_write>;
19+
@group(1) @binding(4) var di_reservoirs_b: texture_storage_2d<rgba32uint, read_write>;
2020
@group(1) @binding(7) var gbuffer: texture_2d<u32>;
2121
@group(1) @binding(8) var depth_buffer: texture_depth_2d;
2222
@group(1) @binding(9) var motion_vectors: texture_2d<f32>;
@@ -42,7 +42,7 @@ fn initial_and_temporal(@builtin(workgroup_id) workgroup_id: vec3<u32>, @builtin
4242

4343
let depth = textureLoad(depth_buffer, global_id.xy, 0);
4444
if depth == 0.0 {
45-
di_reservoirs_b[pixel_index] = empty_reservoir();
45+
store_reservoir_b(global_id.xy, empty_reservoir());
4646
return;
4747
}
4848
let gpixel = textureLoad(gbuffer, global_id.xy, 0);
@@ -55,7 +55,7 @@ fn initial_and_temporal(@builtin(workgroup_id) workgroup_id: vec3<u32>, @builtin
5555
let temporal_reservoir = load_temporal_reservoir(global_id.xy, depth, world_position, world_normal);
5656
let merge_result = merge_reservoirs(initial_reservoir, temporal_reservoir, world_position, world_normal, diffuse_brdf, &rng);
5757

58-
di_reservoirs_b[pixel_index] = merge_result.merged_reservoir;
58+
store_reservoir_b(global_id.xy, merge_result.merged_reservoir);
5959
}
6060

6161
@compute @workgroup_size(8, 8, 1)
@@ -67,7 +67,7 @@ fn spatial_and_shade(@builtin(global_invocation_id) global_id: vec3<u32>) {
6767

6868
let depth = textureLoad(depth_buffer, global_id.xy, 0);
6969
if depth == 0.0 {
70-
di_reservoirs_a[pixel_index] = empty_reservoir();
70+
store_reservoir_a(global_id.xy, empty_reservoir());
7171
textureStore(view_output, global_id.xy, vec4(vec3(0.0), 1.0));
7272
return;
7373
}
@@ -78,12 +78,12 @@ fn spatial_and_shade(@builtin(global_invocation_id) global_id: vec3<u32>) {
7878
let diffuse_brdf = base_color / PI;
7979
let emissive = rgb9e5_to_vec3_(gpixel.g);
8080

81-
let input_reservoir = di_reservoirs_b[pixel_index];
81+
let input_reservoir = load_reservoir_b(global_id.xy);
8282
let spatial_reservoir = load_spatial_reservoir(global_id.xy, depth, world_position, world_normal, &rng);
8383
let merge_result = merge_reservoirs(input_reservoir, spatial_reservoir, world_position, world_normal, diffuse_brdf, &rng);
8484
let combined_reservoir = merge_result.merged_reservoir;
8585

86-
di_reservoirs_a[pixel_index] = combined_reservoir;
86+
store_reservoir_a(global_id.xy, combined_reservoir);
8787

8888
var pixel_color = merge_result.selected_sample_radiance * combined_reservoir.unbiased_contribution_weight;
8989
pixel_color *= view.exposure;
@@ -155,8 +155,7 @@ fn load_temporal_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3
155155
return empty_reservoir();
156156
}
157157

158-
let temporal_pixel_index = temporal_pixel_id.x + temporal_pixel_id.y * u32(view.main_pass_viewport.z);
159-
var temporal_reservoir = di_reservoirs_a[temporal_pixel_index];
158+
var temporal_reservoir = load_reservoir_a(temporal_pixel_id);
160159

161160
// Check if the light selected in the previous frame no longer exists in the current frame (e.g. entity despawned)
162161
let previous_light_id = temporal_reservoir.sample.light_id >> 16u;
@@ -183,8 +182,7 @@ fn load_spatial_reservoir(pixel_id: vec2<u32>, depth: f32, world_position: vec3<
183182
return empty_reservoir();
184183
}
185184

186-
let spatial_pixel_index = spatial_pixel_id.x + spatial_pixel_id.y * u32(view.main_pass_viewport.z);
187-
var spatial_reservoir = di_reservoirs_b[spatial_pixel_index];
185+
var spatial_reservoir = load_reservoir_b(spatial_pixel_id);
188186

189187
if reservoir_valid(spatial_reservoir) {
190188
let resolved_light_sample = resolve_light_sample(spatial_reservoir.sample, light_sources[spatial_reservoir.sample.light_id >> 16u]);
@@ -234,7 +232,6 @@ fn depth_ndc_to_view_z(ndc_depth: f32) -> f32 {
234232
#endif
235233
}
236234

237-
// Don't adjust the size of this struct without also adjusting DI_RESERVOIR_STRUCT_SIZE.
238235
struct Reservoir {
239236
sample: LightSample,
240237
confidence_weight: f32,
@@ -253,6 +250,32 @@ fn reservoir_valid(reservoir: Reservoir) -> bool {
253250
return reservoir.sample.light_id != NULL_RESERVOIR_SAMPLE;
254251
}
255252

253+
fn pack_reservoir(reservoir: Reservoir) -> vec4<u32> {
254+
let weights = bitcast<vec2<u32>>(vec2<f32>(reservoir.confidence_weight, reservoir.unbiased_contribution_weight));
255+
return vec4<u32>(reservoir.sample.light_id, reservoir.sample.seed, weights);
256+
}
257+
258+
fn store_reservoir_a(pixel: vec2<u32>, reservoir: Reservoir) {
259+
textureStore(di_reservoirs_a, pixel, pack_reservoir(reservoir));
260+
}
261+
262+
fn store_reservoir_b(pixel: vec2<u32>, reservoir: Reservoir) {
263+
textureStore(di_reservoirs_b, pixel, pack_reservoir(reservoir));
264+
}
265+
266+
fn unpack_reservoir(packed: vec4<u32>) -> Reservoir {
267+
let weights = bitcast<vec2<f32>>(packed.zw);
268+
return Reservoir(LightSample(packed.x, packed.y), weights.x, weights.y);
269+
}
270+
271+
fn load_reservoir_a(pixel: vec2<u32>) -> Reservoir {
272+
return unpack_reservoir(textureLoad(di_reservoirs_a, pixel));
273+
}
274+
275+
fn load_reservoir_b(pixel: vec2<u32>) -> Reservoir {
276+
return unpack_reservoir(textureLoad(di_reservoirs_b, pixel));
277+
}
278+
256279
struct ReservoirMergeResult {
257280
merged_reservoir: Reservoir,
258281
selected_sample_radiance: vec3<f32>,

crates/bevy_solari/src/scene/raytracing_scene_bindings.wgsl

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -152,21 +152,38 @@ fn resolve_ray_hit_full(ray_hit: RayIntersection) -> ResolvedRayHitFull {
152152
return resolve_triangle_data_full(ray_hit.instance_index, ray_hit.primitive_index, barycentrics);
153153
}
154154

155-
fn resolve_triangle_data_full(instance_id: u32, triangle_id: u32, barycentrics: vec3<f32>) -> ResolvedRayHitFull {
156-
let instance_geometry_ids = geometry_ids[instance_id];
157-
let material_id = material_ids[instance_id];
158-
155+
fn load_vertices(instance_geometry_ids: InstanceGeometryIds, triangle_id: u32) -> array<Vertex, 3> {
159156
let index_buffer = &index_buffers[instance_geometry_ids.index_buffer_id].indices;
160157
let vertex_buffer = &vertex_buffers[instance_geometry_ids.vertex_buffer_id].vertices;
161-
let material = materials[material_id];
162158

163159
let indices_i = (triangle_id * 3u) + vec3(0u, 1u, 2u) + instance_geometry_ids.index_buffer_offset;
164160
let indices = vec3((*index_buffer)[indices_i.x], (*index_buffer)[indices_i.y], (*index_buffer)[indices_i.z]) + instance_geometry_ids.vertex_buffer_offset;
165-
let vertices = array<Vertex, 3>(unpack_vertex((*vertex_buffer)[indices.x]), unpack_vertex((*vertex_buffer)[indices.y]), unpack_vertex((*vertex_buffer)[indices.z]));
166161

162+
return array<Vertex, 3>(
163+
unpack_vertex((*vertex_buffer)[indices.x]),
164+
unpack_vertex((*vertex_buffer)[indices.y]),
165+
unpack_vertex((*vertex_buffer)[indices.z])
166+
);
167+
}
168+
169+
fn transform_positions(transform: mat4x4<f32>, vertices: array<Vertex, 3>) -> array<vec3<f32>, 3> {
170+
return array<vec3<f32>, 3>(
171+
(transform * vec4(vertices[0].position, 1.0)).xyz,
172+
(transform * vec4(vertices[1].position, 1.0)).xyz,
173+
(transform * vec4(vertices[2].position, 1.0)).xyz
174+
);
175+
}
176+
177+
fn resolve_triangle_data_full(instance_id: u32, triangle_id: u32, barycentrics: vec3<f32>) -> ResolvedRayHitFull {
178+
let material_id = material_ids[instance_id];
179+
let material = materials[material_id];
180+
181+
let instance_geometry_ids = geometry_ids[instance_id];
182+
let vertices = load_vertices(instance_geometry_ids, triangle_id);
167183
let transform = transforms[instance_id];
168-
let local_position = mat3x3(vertices[0].position, vertices[1].position, vertices[2].position) * barycentrics;
169-
let world_position = (transform * vec4(local_position, 1.0)).xyz;
184+
let world_vertices = transform_positions(transform, vertices);
185+
186+
let world_position = mat3x3(world_vertices[0], world_vertices[1], world_vertices[2]) * barycentrics;
170187

171188
let uv = mat3x2(vertices[0].uv, vertices[1].uv, vertices[2].uv) * barycentrics;
172189

@@ -188,8 +205,8 @@ fn resolve_triangle_data_full(instance_id: u32, triangle_id: u32, barycentrics:
188205
world_normal = normalize(Nt.x * T + Nt.y * B + Nt.z * N);
189206
}
190207

191-
let triangle_edge0 = vertices[0].position - vertices[1].position;
192-
let triangle_edge1 = vertices[0].position - vertices[2].position;
208+
let triangle_edge0 = world_vertices[0] - world_vertices[1];
209+
let triangle_edge1 = world_vertices[0] - world_vertices[2];
193210
let triangle_area = length(cross(triangle_edge0, triangle_edge1)) / 2.0;
194211

195212
let resolved_material = resolve_material(material, uv);

release-content/release-notes/bevy_solari.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22
title: Initial raytraced lighting progress (bevy_solari)
33
authors: ["@JMS55", "@SparkyPotato"]
4-
pull_requests: [19058, 19620, 19790, 20020, 20113, 20213, 20242, 20259, 20406]
4+
pull_requests: [19058, 19620, 19790, 20020, 20113, 20156, 20213, 20242, 20259, 20406]
55
---
66

77
(TODO: Embed solari example screenshot here)

0 commit comments

Comments
 (0)