Skip to content

Commit 01d128d

Browse files
committed
Improve bloom add option to use faster but lower quality bloom
1 parent 2146899 commit 01d128d

File tree

9 files changed

+125
-18
lines changed

9 files changed

+125
-18
lines changed

crates/bevy_post_process/src/bloom/bloom.wgsl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,43 @@ fn karis_average(color: vec3<f32>) -> f32 {
4949
return 1.0 / (1.0 + luma);
5050
}
5151

52+
// https://www.shadertoy.com/view/mdsyDf
53+
#ifdef FAST_BLUR
54+
fn bloom_down_kernel4(uv: vec2<f32>) -> vec3<f32> {
55+
let ps = uniforms.scale / vec2<f32>(textureDimensions(input_texture));
56+
let o = 0.5 + 1.0 / 4.0;
57+
58+
let a = textureSample(input_texture, s, uv + vec2<f32>(-1.0, -1.0) * o * ps).rgb * 0.25;
59+
let b = textureSample(input_texture, s, uv + vec2<f32>(1.0, -1.0) * o * ps).rgb * 0.25;
60+
let c = textureSample(input_texture, s, uv + vec2<f32>(-1.0, 1.0) * o * ps).rgb * 0.25;
61+
let d = textureSample(input_texture, s, uv + vec2<f32>(1.0, 1.0) * o * ps).rgb * 0.25;
62+
63+
#ifdef FIRST_DOWNSAMPLE
64+
return (a + b + c + d) * karis_average(a + b + c + d);
65+
#else
66+
return a + b + c + d;
67+
#endif
68+
}
69+
70+
fn bloom_up_kernel4(uv: vec2<f32>) -> vec3<f32> {
71+
// Modified version of BloomUpKernel4B https://www.shadertoy.com/view/mdsyDf. I couldn't get a good result with the original version.
72+
let ps = uniforms.scale / vec2<f32>(textureDimensions(input_texture));
73+
let w = vec4<f32>(0.211029, 0.288971, 0.288971, 0.211029);
74+
// Add a small offset for better radial symmetry.
75+
let l00 = vec2<f32>(0.347209, 0.526425) + 0.1;
76+
let l10 = vec2<f32>(0.109840, 0.334045) + 0.1;
77+
let l01 = vec2<f32>(0.334045, 0.109840) + 0.1;
78+
let l11 = vec2<f32>(0.526425, 0.347209) + 0.1;
79+
80+
let a = textureSample(input_texture, s, uv + (vec2<f32>( -0.5, -1.5) + l00) * ps).rgb * w.x;
81+
let b = textureSample(input_texture, s, uv + (vec2<f32>(0.5, -0.5) + l10) * ps).rgb * w.y;
82+
let c = textureSample(input_texture, s, uv + (vec2<f32>( -0.5, 0.5) + l01) * ps).rgb * w.z;
83+
let d = textureSample(input_texture, s, uv + (vec2<f32>( -1.5,-0.5) + l11) * ps).rgb * w.w;
84+
85+
return a + b + c + d;
86+
}
87+
#endif
88+
5289
// [COD] slide 153
5390
fn sample_input_13_tap(uv: vec2<f32>) -> vec3<f32> {
5491
#ifdef UNIFORM_SCALE
@@ -161,7 +198,11 @@ fn sample_input_3x3_tent(uv: vec2<f32>) -> vec3<f32> {
161198
@fragment
162199
fn downsample_first(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32> {
163200
let sample_uv = uniforms.viewport.xy + output_uv * uniforms.viewport.zw;
201+
#ifdef FAST_BLUR
202+
var sample = bloom_down_kernel4(sample_uv);
203+
#else
164204
var sample = sample_input_13_tap(sample_uv);
205+
#endif
165206
// Lower bound of 0.0001 is to avoid propagating multiplying by 0.0 through the
166207
// downscaling and upscaling which would result in black boxes.
167208
// The upper bound is to prevent NaNs.
@@ -178,10 +219,18 @@ fn downsample_first(@location(0) output_uv: vec2<f32>) -> @location(0) vec4<f32>
178219

179220
@fragment
180221
fn downsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
222+
#ifdef FAST_BLUR
223+
return vec4<f32>(bloom_down_kernel4(uv), 1.0);
224+
#else
181225
return vec4<f32>(sample_input_13_tap(uv), 1.0);
226+
#endif
182227
}
183228

184229
@fragment
185230
fn upsample(@location(0) uv: vec2<f32>) -> @location(0) vec4<f32> {
231+
#ifdef FAST_BLUR
232+
return vec4<f32>(bloom_up_kernel4(uv), 1.0);
233+
#else
186234
return vec4<f32>(sample_input_3x3_tent(uv), 1.0);
235+
#endif
187236
}

crates/bevy_post_process/src/bloom/downsampling_pipeline.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ pub struct BloomDownsamplingPipelineKeys {
4040
prefilter: bool,
4141
first_downsample: bool,
4242
uniform_scale: bool,
43+
high_quality: bool,
4344
}
4445

4546
/// The uniform struct extracted from [`Bloom`] attached to a Camera.
@@ -79,8 +80,9 @@ pub fn init_bloom_downsampling_pipeline(
7980
let sampler = render_device.create_sampler(&SamplerDescriptor {
8081
min_filter: FilterMode::Linear,
8182
mag_filter: FilterMode::Linear,
82-
address_mode_u: AddressMode::ClampToEdge,
83-
address_mode_v: AddressMode::ClampToEdge,
83+
address_mode_u: AddressMode::ClampToBorder,
84+
address_mode_v: AddressMode::ClampToBorder,
85+
border_color: Some(SamplerBorderColor::TransparentBlack),
8486
..Default::default()
8587
});
8688

@@ -110,6 +112,10 @@ impl SpecializedRenderPipeline for BloomDownsamplingPipeline {
110112
shader_defs.push("FIRST_DOWNSAMPLE".into());
111113
}
112114

115+
if !key.high_quality {
116+
shader_defs.push("FAST_BLUR".into());
117+
}
118+
113119
if key.prefilter {
114120
shader_defs.push("USE_THRESHOLD".into());
115121
}
@@ -161,6 +167,7 @@ pub fn prepare_downsampling_pipeline(
161167
prefilter,
162168
first_downsample: false,
163169
uniform_scale: bloom.scale == Vec2::ONE,
170+
high_quality: bloom.high_quality,
164171
},
165172
);
166173

@@ -171,6 +178,7 @@ pub fn prepare_downsampling_pipeline(
171178
prefilter,
172179
first_downsample: true,
173180
uniform_scale: bloom.scale == Vec2::ONE,
181+
high_quality: bloom.high_quality,
174182
},
175183
);
176184

crates/bevy_post_process/src/bloom/mod.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ impl ViewNode for BloomNode {
181181
&BindGroupEntries::sequential((
182182
// Read from main texture directly
183183
view_texture,
184-
&bind_groups.sampler,
184+
&downsampling_pipeline_res.sampler,
185185
uniforms.clone(),
186186
)),
187187
);
@@ -361,16 +361,20 @@ fn prepare_bloom_textures(
361361
for (entity, camera, bloom) in &views {
362362
if let Some(viewport) = camera.physical_viewport_size {
363363
// How many times we can halve the resolution minus one so we don't go unnecessarily low
364-
let mip_count = bloom.max_mip_dimension.ilog2().max(2) - 1;
365-
let mip_height_ratio = if viewport.y != 0 {
366-
bloom.max_mip_dimension as f32 / viewport.y as f32
364+
let mip_count = bloom
365+
.max_mip_dimension
366+
.ilog2()
367+
.clamp(2, bloom.max_mip_count)
368+
- 1;
369+
let mip_dim_ratio = if viewport.y != 0 && viewport.x != 0 {
370+
(bloom.max_mip_dimension as f32 / viewport.as_vec2()).max_element()
367371
} else {
368372
0.
369373
};
370374

371375
let texture_descriptor = TextureDescriptor {
372376
label: Some("bloom_texture"),
373-
size: (viewport.as_vec2() * mip_height_ratio)
377+
size: (viewport.as_vec2() * mip_dim_ratio)
374378
.round()
375379
.as_uvec2()
376380
.max(UVec2::ONE)
@@ -419,7 +423,6 @@ struct BloomBindGroups {
419423
cache_key: (TextureId, BufferId),
420424
downsampling_bind_groups: Box<[BindGroup]>,
421425
upsampling_bind_groups: Box<[BindGroup]>,
422-
sampler: Sampler,
423426
}
424427

425428
fn prepare_bloom_bind_groups(
@@ -430,8 +433,6 @@ fn prepare_bloom_bind_groups(
430433
views: Query<(Entity, &BloomTexture, Option<&BloomBindGroups>)>,
431434
uniforms: Res<ComponentUniforms<BloomUniforms>>,
432435
) {
433-
let sampler = &downsampling_pipeline.sampler;
434-
435436
for (entity, bloom_texture, bloom_bind_groups) in &views {
436437
if let Some(b) = bloom_bind_groups
437438
&& b.cache_key
@@ -452,7 +453,7 @@ fn prepare_bloom_bind_groups(
452453
&downsampling_pipeline.bind_group_layout,
453454
&BindGroupEntries::sequential((
454455
&bloom_texture.view(mip - 1),
455-
sampler,
456+
&downsampling_pipeline.sampler,
456457
uniforms.binding().unwrap(),
457458
)),
458459
));
@@ -465,7 +466,7 @@ fn prepare_bloom_bind_groups(
465466
&upsampling_pipeline.bind_group_layout,
466467
&BindGroupEntries::sequential((
467468
&bloom_texture.view(mip),
468-
sampler,
469+
&upsampling_pipeline.sampler,
469470
uniforms.binding().unwrap(),
470471
)),
471472
));
@@ -478,7 +479,6 @@ fn prepare_bloom_bind_groups(
478479
),
479480
downsampling_bind_groups: downsampling_bind_groups.into_boxed_slice(),
480481
upsampling_bind_groups: upsampling_bind_groups.into_boxed_slice(),
481-
sampler: sampler.clone(),
482482
});
483483
}
484484
}

crates/bevy_post_process/src/bloom/settings.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,21 @@ pub struct Bloom {
116116
pub composite_mode: BloomCompositeMode,
117117

118118
/// Maximum size of each dimension for the largest mipchain texture used in downscaling/upscaling.
119-
/// Only tweak if you are seeing visual artifacts.
119+
/// Lower values can improve performance but result in more aliasing.
120120
pub max_mip_dimension: u32,
121121

122+
/// Maximum number of mipmaps to use in downscaling/upscaling (default: [`u32::MAX`]).
123+
/// Lower values can improve performance but lose some low frequency contributions.
124+
pub max_mip_count: u32,
125+
122126
/// Amount to stretch the bloom on each axis. Artistic control, can be used to emulate
123127
/// anamorphic blur by using a large x-value. For large values, you may need to increase
124128
/// [`Bloom::max_mip_dimension`] to reduce sampling artifacts.
125129
pub scale: Vec2,
130+
131+
// Whether to use a high quality bloom implementation (default: true).
132+
// If false, bloom will use an implementation that significantly reduces the number of texture samples and improves performance, but at the cost of lower quality.
133+
pub high_quality: bool,
126134
}
127135

128136
impl Bloom {
@@ -143,6 +151,8 @@ impl Bloom {
143151
composite_mode: BloomCompositeMode::EnergyConserving,
144152
max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION,
145153
scale: Vec2::ONE,
154+
high_quality: true,
155+
max_mip_count: u32::MAX,
146156
};
147157

148158
/// Emulates the look of stylized anamorphic bloom, stretched horizontally.
@@ -166,6 +176,8 @@ impl Bloom {
166176
composite_mode: BloomCompositeMode::Additive,
167177
max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION,
168178
scale: Vec2::ONE,
179+
high_quality: true,
180+
max_mip_count: u32::MAX,
169181
};
170182

171183
/// A preset that applies a very strong bloom, and blurs the whole screen.
@@ -181,6 +193,8 @@ impl Bloom {
181193
composite_mode: BloomCompositeMode::EnergyConserving,
182194
max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION,
183195
scale: Vec2::ONE,
196+
high_quality: true,
197+
max_mip_count: u32::MAX,
184198
};
185199
}
186200

crates/bevy_post_process/src/bloom/upsampling_pipeline.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub struct UpsamplingPipelineIds {
2929
#[derive(Resource)]
3030
pub struct BloomUpsamplingPipeline {
3131
pub bind_group_layout: BindGroupLayout,
32+
pub sampler: Sampler,
3233
/// The asset handle for the fullscreen vertex shader.
3334
pub fullscreen_shader: FullscreenShader,
3435
/// The fragment shader asset handle.
@@ -39,6 +40,7 @@ pub struct BloomUpsamplingPipeline {
3940
pub struct BloomUpsamplingPipelineKeys {
4041
composite_mode: BloomCompositeMode,
4142
final_pipeline: bool,
43+
high_quality: bool,
4244
}
4345

4446
pub fn init_bloom_upscaling_pipeline(
@@ -62,8 +64,18 @@ pub fn init_bloom_upscaling_pipeline(
6264
),
6365
);
6466

67+
// Sampler
68+
let sampler = render_device.create_sampler(&SamplerDescriptor {
69+
min_filter: FilterMode::Linear,
70+
mag_filter: FilterMode::Linear,
71+
address_mode_u: AddressMode::ClampToEdge,
72+
address_mode_v: AddressMode::ClampToEdge,
73+
..Default::default()
74+
});
75+
6576
commands.insert_resource(BloomUpsamplingPipeline {
6677
bind_group_layout,
78+
sampler,
6779
fullscreen_shader: fullscreen_shader.clone(),
6880
fragment_shader: load_embedded_asset!(asset_server.as_ref(), "bloom.wgsl"),
6981
});
@@ -79,6 +91,11 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline {
7991
BLOOM_TEXTURE_FORMAT
8092
};
8193

94+
let mut shader_defs = vec![];
95+
if !key.high_quality {
96+
shader_defs.push("FAST_BLUR".into());
97+
}
98+
8299
let color_blend = match key.composite_mode {
83100
BloomCompositeMode::EnergyConserving => {
84101
// At the time of developing this we decided to blend our
@@ -117,6 +134,7 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline {
117134
vertex: self.fullscreen_shader.to_vertex_state(),
118135
fragment: Some(FragmentState {
119136
shader: self.fragment_shader.clone(),
137+
shader_defs,
120138
entry_point: Some("upsample".into()),
121139
targets: vec![Some(ColorTargetState {
122140
format: texture_format,
@@ -130,7 +148,6 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline {
130148
}),
131149
write_mask: ColorWrites::ALL,
132150
})],
133-
..default()
134151
}),
135152
..default()
136153
}
@@ -151,6 +168,7 @@ pub fn prepare_upsampling_pipeline(
151168
BloomUpsamplingPipelineKeys {
152169
composite_mode: bloom.composite_mode,
153170
final_pipeline: false,
171+
high_quality: bloom.high_quality,
154172
},
155173
);
156174

@@ -160,6 +178,7 @@ pub fn prepare_upsampling_pipeline(
160178
BloomUpsamplingPipelineKeys {
161179
composite_mode: bloom.composite_mode,
162180
final_pipeline: true,
181+
high_quality: bloom.high_quality,
163182
},
164183
);
165184

crates/bevy_render/src/render_resource/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ pub use wgpu::{
5555
PushConstantRange, RenderPassColorAttachment, RenderPassDepthStencilAttachment,
5656
RenderPassDescriptor, RenderPipelineDescriptor as RawRenderPipelineDescriptor,
5757
Sampler as WgpuSampler, SamplerBindingType, SamplerBindingType as WgpuSamplerBindingType,
58-
SamplerDescriptor, ShaderModule, ShaderModuleDescriptor, ShaderSource, ShaderStages,
59-
StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, StoreOp,
58+
SamplerBorderColor, SamplerDescriptor, ShaderModule, ShaderModuleDescriptor, ShaderSource,
59+
ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, StoreOp,
6060
TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
6161
TextureDescriptor, TextureDimension, TextureFormat, TextureFormatFeatureFlags,
6262
TextureFormatFeatures, TextureSampleType, TextureUsages, TextureView as WgpuTextureView,

examples/2d/bloom_2d.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ fn update_bloom_settings(
110110
bloom.prefilter.threshold_softness
111111
));
112112
text.push_str(&format!("(I/K) Horizontal Scale: {:.2}\n", bloom.scale.x));
113+
text.push_str(&format!("(P) High quality: {}\n", bloom.high_quality));
113114

114115
if keycode.just_pressed(KeyCode::Space) {
115116
commands.entity(camera_entity).remove::<Bloom>();
@@ -180,6 +181,10 @@ fn update_bloom_settings(
180181
bloom.scale.x += dt * 2.0;
181182
}
182183
bloom.scale.x = bloom.scale.x.clamp(0.0, 16.0);
184+
185+
if keycode.just_pressed(KeyCode::KeyP) {
186+
bloom.high_quality = !bloom.high_quality;
187+
}
183188
}
184189

185190
None => {

examples/3d/bloom_3d.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,8 @@ fn update_bloom_settings(
134134
"(U/J) Threshold softness: {:.2}\n",
135135
bloom.prefilter.threshold_softness
136136
));
137-
text.push_str(&format!("(I/K) Horizontal Scale: {:.2}\n", bloom.scale.x));
137+
text.push_str(&format!("(I/K) Horizontal scale: {:.2}\n", bloom.scale.x));
138+
text.push_str(&format!("(P) High quality: {}\n", bloom.high_quality));
138139

139140
if keycode.just_pressed(KeyCode::Space) {
140141
commands.entity(entity).remove::<Bloom>();
@@ -205,6 +206,10 @@ fn update_bloom_settings(
205206
bloom.scale.x += dt * 2.0;
206207
}
207208
bloom.scale.x = bloom.scale.x.clamp(0.0, 8.0);
209+
210+
if keycode.just_pressed(KeyCode::KeyP) {
211+
bloom.high_quality = !bloom.high_quality;
212+
}
208213
}
209214

210215
(entity, None) => {
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
title: Add a faster bloom implementation.
3+
authors: ["@beicause"]
4+
pull_requests: [21340]
5+
---
6+
7+
Bloom now has a `high_quality` (default: true) option to control whether to use a high quality implementation, or a faster but lower quality implementation. The lower quality bloom still maintains reasonable visual quality while significantly reducing texture sampling. For low-end devices, this could potentially reduce frame time by a few milliseconds.

0 commit comments

Comments
 (0)