Skip to content

Commit 61a1e0c

Browse files
committed
Implement per-pixel linked list for OIT
1 parent 2a24658 commit 61a1e0c

File tree

9 files changed

+295
-208
lines changed

9 files changed

+295
-208
lines changed

crates/bevy_core_pipeline/src/oit/mod.rs

Lines changed: 92 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
//! Order Independent Transparency (OIT) for 3d rendering. See [`OrderIndependentTransparencyPlugin`] for more details.
22
33
use bevy_app::prelude::*;
4-
use bevy_camera::{Camera, Camera3d};
5-
use bevy_ecs::{component::*, lifecycle::ComponentHook, prelude::*};
6-
use bevy_math::UVec2;
7-
use bevy_platform::collections::HashSet;
4+
use bevy_camera::Camera3d;
5+
use bevy_ecs::{component::*, prelude::*};
6+
use bevy_math::{UVec2, UVec3};
87
use bevy_platform::time::Instant;
98
use bevy_reflect::{std_traits::ReflectDefault, Reflect};
109
use bevy_render::{
@@ -17,12 +16,11 @@ use bevy_render::{
1716
Render, RenderApp, RenderStartup, RenderSystems,
1817
};
1918
use bevy_shader::load_shader_library;
20-
use bevy_window::PrimaryWindow;
2119
use resolve::{
2220
node::{OitResolveNode, OitResolvePass},
2321
OitResolvePlugin,
2422
};
25-
use tracing::{trace, warn};
23+
use tracing::trace;
2624

2725
use crate::core_3d::graph::{Core3d, Node3d};
2826

@@ -35,13 +33,16 @@ pub mod resolve;
3533
// depth peeling, stochastic transparency, ray tracing etc.
3634
// This should probably be done by adding an enum to this component.
3735
// We use the same struct to pass on the settings to the drawing shader.
38-
#[derive(Clone, Copy, ExtractComponent, Reflect, ShaderType)]
36+
#[derive(Clone, Copy, ExtractComponent, Reflect, ShaderType, Component)]
3937
#[reflect(Clone, Default)]
4038
pub struct OrderIndependentTransparencySettings {
41-
/// Controls how many layers will be used to compute the blending.
42-
/// The more layers you use the more memory it will use but it will also give better results.
43-
/// 8 is generally recommended, going above 32 is probably not worth it in the vast majority of cases
44-
pub layer_count: i32,
39+
/// Controls how many fragments will be exactly sorted.
40+
/// If the scene has more fragments than this, they will be merged approximately.
41+
/// More sorted fragments is more accurate but will be slower.
42+
pub sorted_fragment_max_count: u32,
43+
/// The average fragments per pixel stored in the buffer. This should be bigger enough to make oit succeed.
44+
/// Higher values increase memory usage.
45+
pub fragments_per_pixel_average: f32,
4546
/// Threshold for which fragments will be added to the blending layers.
4647
/// This can be tweaked to optimize quality / layers count. Higher values will
4748
/// allow lower number of layers and a better performance, compromising quality.
@@ -51,32 +52,13 @@ pub struct OrderIndependentTransparencySettings {
5152
impl Default for OrderIndependentTransparencySettings {
5253
fn default() -> Self {
5354
Self {
54-
layer_count: 8,
55+
sorted_fragment_max_count: 16,
56+
fragments_per_pixel_average: 8.0,
5557
alpha_threshold: 0.0,
5658
}
5759
}
5860
}
5961

60-
// OrderIndependentTransparencySettings is also a Component. We explicitly implement the trait so
61-
// we can hook on_add to issue a warning in case `layer_count` is seemingly too high.
62-
impl Component for OrderIndependentTransparencySettings {
63-
const STORAGE_TYPE: StorageType = StorageType::SparseSet;
64-
type Mutability = Mutable;
65-
66-
fn on_add() -> Option<ComponentHook> {
67-
Some(|world, context| {
68-
if let Some(value) = world.get::<OrderIndependentTransparencySettings>(context.entity)
69-
&& value.layer_count > 32
70-
{
71-
warn!("{}OrderIndependentTransparencySettings layer_count set to {} might be too high.",
72-
context.caller.map(|location|format!("{location}: ")).unwrap_or_default(),
73-
value.layer_count
74-
);
75-
}
76-
})
77-
}
78-
}
79-
8062
/// A plugin that adds support for Order Independent Transparency (OIT).
8163
/// This can correctly render some scenes that would otherwise have artifacts due to alpha blending, but uses more memory.
8264
///
@@ -88,8 +70,8 @@ impl Component for OrderIndependentTransparencySettings {
8870
/// # Implementation details
8971
/// This implementation uses 2 passes.
9072
///
91-
/// The first pass writes the depth and color of all the fragments to a big buffer.
92-
/// The buffer contains N layers for each pixel, where N can be set with [`OrderIndependentTransparencySettings::layer_count`].
73+
/// The first pass constructs a linked list which stores depth and color of all fragments in a big buffer.
74+
/// The linked list capacity can be set with [`OrderIndependentTransparencySettings::fragments_per_pixel_average`].
9375
/// This pass is essentially a forward pass.
9476
///
9577
/// The second pass is a single fullscreen triangle pass that sorts all the fragments then blends them together
@@ -103,8 +85,7 @@ impl Plugin for OrderIndependentTransparencyPlugin {
10385
ExtractComponentPlugin::<OrderIndependentTransparencySettings>::default(),
10486
OitResolvePlugin,
10587
))
106-
.add_systems(Update, check_msaa)
107-
.add_systems(Last, configure_depth_texture_usages);
88+
.add_systems(Update, check_msaa);
10889

10990
let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
11091
return;
@@ -114,7 +95,10 @@ impl Plugin for OrderIndependentTransparencyPlugin {
11495
.add_systems(RenderStartup, init_oit_buffers)
11596
.add_systems(
11697
Render,
117-
prepare_oit_buffers.in_set(RenderSystems::PrepareResources),
98+
(
99+
configure_camera_depth_usages.in_set(RenderSystems::ManageViews),
100+
prepare_oit_buffers.in_set(RenderSystems::PrepareResources),
101+
),
118102
);
119103

120104
render_app
@@ -130,34 +114,17 @@ impl Plugin for OrderIndependentTransparencyPlugin {
130114
}
131115
}
132116

133-
// WARN This should only happen for cameras with the [`OrderIndependentTransparencySettings`] component
134-
// but when multiple cameras are present on the same window
135-
// bevy reuses the same depth texture so we need to set this on all cameras with the same render target.
136-
fn configure_depth_texture_usages(
137-
p: Query<Entity, With<PrimaryWindow>>,
138-
cameras: Query<(&Camera, Has<OrderIndependentTransparencySettings>)>,
139-
mut new_cameras: Query<(&mut Camera3d, &Camera), Added<Camera3d>>,
117+
fn configure_camera_depth_usages(
118+
mut cameras: Query<
119+
&mut Camera3d,
120+
(
121+
Changed<Camera3d>,
122+
With<OrderIndependentTransparencySettings>,
123+
),
124+
>,
140125
) {
141-
if new_cameras.is_empty() {
142-
return;
143-
}
144-
145-
// Find all the render target that potentially uses OIT
146-
let primary_window = p.single().ok();
147-
let mut render_target_has_oit = <HashSet<_>>::default();
148-
for (camera, has_oit) in &cameras {
149-
if has_oit {
150-
render_target_has_oit.insert(camera.target.normalize(primary_window));
151-
}
152-
}
153-
154-
// Update the depth texture usage for cameras with a render target that has OIT
155-
for (mut camera_3d, camera) in &mut new_cameras {
156-
if render_target_has_oit.contains(&camera.target.normalize(primary_window)) {
157-
let mut usages = TextureUsages::from(camera_3d.depth_texture_usages);
158-
usages |= TextureUsages::RENDER_ATTACHMENT | TextureUsages::TEXTURE_BINDING;
159-
camera_3d.depth_texture_usages = usages.into();
160-
}
126+
for mut camera in &mut cameras {
127+
camera.depth_texture_usages.0 |= TextureUsages::TEXTURE_BINDING.bits();
161128
}
162129
}
163130

@@ -169,18 +136,35 @@ fn check_msaa(cameras: Query<&Msaa, With<OrderIndependentTransparencySettings>>)
169136
}
170137
}
171138

139+
#[derive(Clone, Copy, ShaderType)]
140+
pub struct OitFragmentNode {
141+
color: u32,
142+
depth_alpha: u32,
143+
next: u32,
144+
}
145+
146+
impl Default for OitFragmentNode {
147+
fn default() -> Self {
148+
Self {
149+
color: 0,
150+
depth_alpha: 0,
151+
next: u32::MAX,
152+
}
153+
}
154+
}
155+
172156
/// Holds the buffers that contain the data of all OIT layers.
173157
/// We use one big buffer for the entire app. Each camera will reuse it so it will
174158
/// always be the size of the biggest OIT enabled camera.
175159
#[derive(Resource)]
176160
pub struct OitBuffers {
177-
/// The OIT layers containing depth and color for each fragments.
161+
pub settings: DynamicUniformBuffer<OrderIndependentTransparencySettings>,
162+
/// The OIT layers containing color/depth/next_node for each fragments.
178163
/// This is essentially used as a 3d array where xy is the screen coordinate and z is
179164
/// the list of fragments rendered with OIT.
180-
pub layers: BufferVec<UVec2>,
181-
/// Buffer containing the index of the last layer that was written for each fragment.
182-
pub layer_ids: BufferVec<i32>,
183-
pub settings: DynamicUniformBuffer<OrderIndependentTransparencySettings>,
165+
pub nodes: BufferVec<OitFragmentNode>,
166+
pub headers: BufferVec<u32>,
167+
pub atomic_counter: BufferVec<u32>,
184168
}
185169

186170
pub fn init_oit_buffers(
@@ -190,22 +174,28 @@ pub fn init_oit_buffers(
190174
) {
191175
// initialize buffers with something so there's a valid binding
192176

193-
let mut layers = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
194-
layers.set_label(Some("oit_layers"));
195-
layers.reserve(1, &render_device);
196-
layers.write_buffer(&render_device, &render_queue);
177+
let mut nodes = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
178+
nodes.set_label(Some("oit_nodes"));
179+
nodes.push(OitFragmentNode::default());
180+
nodes.write_buffer(&render_device, &render_queue);
181+
182+
let mut headers = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
183+
headers.set_label(Some("oit_headers"));
184+
headers.push(u32::MAX);
185+
headers.write_buffer(&render_device, &render_queue);
197186

198-
let mut layer_ids = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
199-
layer_ids.set_label(Some("oit_layer_ids"));
200-
layer_ids.reserve(1, &render_device);
201-
layer_ids.write_buffer(&render_device, &render_queue);
187+
let mut atomic_counter = BufferVec::new(BufferUsages::COPY_DST | BufferUsages::STORAGE);
188+
atomic_counter.set_label(Some("oit_atomic_counter"));
189+
atomic_counter.push(0);
190+
atomic_counter.write_buffer(&render_device, &render_queue);
202191

203192
let mut settings = DynamicUniformBuffer::default();
204193
settings.set_label(Some("oit_settings"));
205194

206195
commands.insert_resource(OitBuffers {
207-
layers,
208-
layer_ids,
196+
nodes,
197+
headers,
198+
atomic_counter,
209199
settings,
210200
});
211201
}
@@ -233,52 +223,46 @@ pub fn prepare_oit_buffers(
233223
mut buffers: ResMut<OitBuffers>,
234224
) {
235225
// Get the max buffer size for any OIT enabled camera
236-
let mut max_layer_ids_size = usize::MIN;
237-
let mut max_layers_size = usize::MIN;
226+
let mut max_size = UVec2::new(0, 0);
227+
let mut fragments_per_pixel_average = 0f32;
238228
for (camera, settings) in &cameras {
239229
let Some(size) = camera.physical_target_size else {
240230
continue;
241231
};
242-
243-
let layer_count = settings.layer_count as usize;
244-
let size = (size.x * size.y) as usize;
245-
max_layer_ids_size = max_layer_ids_size.max(size);
246-
max_layers_size = max_layers_size.max(size * layer_count);
232+
max_size = max_size.max(size);
233+
fragments_per_pixel_average =
234+
fragments_per_pixel_average.max(settings.fragments_per_pixel_average);
247235
}
248236

249-
// Create or update the layers buffer based on the max size
250-
if buffers.layers.capacity() < max_layers_size {
237+
// Create or update the headers texture based on the max size
238+
let headers_size = (max_size.x * max_size.y) as usize;
239+
if buffers.headers.capacity() < headers_size {
251240
let start = Instant::now();
252-
buffers.layers.reserve(max_layers_size, &render_device);
253-
let remaining = max_layers_size - buffers.layers.capacity();
254-
for _ in 0..remaining {
255-
buffers.layers.push(UVec2::ZERO);
241+
buffers.headers.clear();
242+
for _ in 0..headers_size {
243+
buffers.headers.push(u32::MAX);
256244
}
257-
buffers.layers.write_buffer(&render_device, &render_queue);
245+
buffers.headers.write_buffer(&render_device, &render_queue);
258246
trace!(
259-
"OIT layers buffer updated in {:.01}ms with total size {} MiB",
247+
"OIT headers texture updated in {:.01}ms with total size {} MiB",
260248
start.elapsed().as_millis(),
261-
buffers.layers.capacity() * size_of::<UVec2>() / 1024 / 1024,
249+
buffers.headers.capacity() * size_of::<u32>() / 1024 / 1024,
262250
);
263251
}
264252

265-
// Create or update the layer_ids buffer based on the max size
266-
if buffers.layer_ids.capacity() < max_layer_ids_size {
253+
// Create or update the nodes buffer based on the max size
254+
let nodes_size = ((max_size.x * max_size.y) as f32 * fragments_per_pixel_average) as usize;
255+
if buffers.nodes.capacity() < nodes_size {
267256
let start = Instant::now();
268-
buffers
269-
.layer_ids
270-
.reserve(max_layer_ids_size, &render_device);
271-
let remaining = max_layer_ids_size - buffers.layer_ids.capacity();
272-
for _ in 0..remaining {
273-
buffers.layer_ids.push(0);
257+
buffers.nodes.clear();
258+
for _ in 0..nodes_size {
259+
buffers.nodes.push(OitFragmentNode::default());
274260
}
275-
buffers
276-
.layer_ids
277-
.write_buffer(&render_device, &render_queue);
261+
buffers.nodes.write_buffer(&render_device, &render_queue);
278262
trace!(
279-
"OIT layer ids buffer updated in {:.01}ms with total size {} MiB",
263+
"OIT nodes buffer updated in {:.01}ms with total size {} MiB",
280264
start.elapsed().as_millis(),
281-
buffers.layer_ids.capacity() * size_of::<UVec2>() / 1024 / 1024,
265+
buffers.nodes.capacity() * size_of::<UVec3>() / 1024 / 1024,
282266
);
283267
}
284268

crates/bevy_core_pipeline/src/oit/oit_draw.wgsl

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#define_import_path bevy_core_pipeline::oit
22

3-
#import bevy_pbr::mesh_view_bindings::{view, oit_layers, oit_layer_ids, oit_settings}
3+
#import bevy_pbr::mesh_view_bindings::{view, oit_nodes, oit_headers, oit_atomic_counter, oit_settings}
4+
#import bevy_pbr::mesh_view_types::OitFragmentNode
45

56
#ifdef OIT_ENABLED
67
// Add the fragment to the oit buffer
@@ -11,27 +12,22 @@ fn oit_draw(position: vec4f, color: vec4f) {
1112
return;
1213
}
1314
// get the index of the current fragment relative to the screen size
14-
let screen_index = i32(floor(position.x) + floor(position.y) * view.viewport.z);
15+
let screen_index = u32(floor(position.x) + floor(position.y) * view.viewport.z);
1516
// get the size of the buffer.
1617
// It's always the size of the screen
17-
let buffer_size = i32(view.viewport.z * view.viewport.w);
18+
let buffer_size = u32(view.viewport.z * view.viewport.w * oit_settings.fragments_per_pixel_average);
1819

19-
// gets the layer index of the current fragment
20-
var layer_id = atomicAdd(&oit_layer_ids[screen_index], 1);
21-
// exit early if we've reached the maximum amount of fragments per layer
22-
if layer_id >= oit_settings.layers_count {
23-
// force to store the oit_layers_count to make sure we don't
24-
// accidentally increase the index above the maximum value
25-
atomicStore(&oit_layer_ids[screen_index], oit_settings.layers_count);
26-
// TODO for tail blending we should return the color here
20+
var new_node_index = atomicAdd(&oit_atomic_counter, 1);
21+
// exit early if we've reached the maximum amount of fragments nodes
22+
if new_node_index >= buffer_size {
2723
return;
2824
}
2925

30-
// get the layer_index from the screen
31-
let layer_index = screen_index + layer_id * buffer_size;
32-
let rgb9e5_color = bevy_pbr::rgb9e5::vec3_to_rgb9e5_(color.rgb);
33-
let depth_alpha = pack_24bit_depth_8bit_alpha(position.z, color.a);
34-
oit_layers[layer_index] = vec2(rgb9e5_color, depth_alpha);
26+
var node: OitFragmentNode;
27+
node.next = atomicExchange(&oit_headers[screen_index], new_node_index);
28+
node.color = bevy_pbr::rgb9e5::vec3_to_rgb9e5_(color.rgb);
29+
node.depth_alpha = pack_24bit_depth_8bit_alpha(position.z, color.a);
30+
oit_nodes[new_node_index] = node;
3531
}
3632
#endif // OIT_ENABLED
3733

0 commit comments

Comments
 (0)