diff --git a/crates/bevy_asset/src/io/source.rs b/crates/bevy_asset/src/io/source.rs index 500a1657f353c..8cca30b885345 100644 --- a/crates/bevy_asset/src/io/source.rs +++ b/crates/bevy_asset/src/io/source.rs @@ -12,7 +12,7 @@ use bevy_ecs::resource::Resource; use bevy_platform::collections::HashMap; use core::{fmt::Display, hash::Hash, time::Duration}; use thiserror::Error; -use tracing::{error, warn}; +use tracing::warn; use super::{ErasedAssetReader, ErasedAssetWriter}; diff --git a/crates/bevy_ecs/src/schedule/schedule.rs b/crates/bevy_ecs/src/schedule/schedule.rs index 87a91c86ae568..e3daedae58631 100644 --- a/crates/bevy_ecs/src/schedule/schedule.rs +++ b/crates/bevy_ecs/src/schedule/schedule.rs @@ -17,7 +17,7 @@ use core::{ fmt::{Debug, Write}, }; use fixedbitset::FixedBitSet; -use log::{error, info, warn}; +use log::{info, warn}; use pass::ScheduleBuildPassObj; use thiserror::Error; #[cfg(feature = "trace")] diff --git a/crates/bevy_image/src/dynamic_texture_atlas_builder.rs b/crates/bevy_image/src/dynamic_texture_atlas_builder.rs index 016ae96454666..54a45d10d1834 100644 --- a/crates/bevy_image/src/dynamic_texture_atlas_builder.rs +++ b/crates/bevy_image/src/dynamic_texture_atlas_builder.rs @@ -3,7 +3,6 @@ use bevy_asset::RenderAssetUsages; use bevy_math::{URect, UVec2}; use guillotiere::{size2, Allocation, AtlasAllocator}; use thiserror::Error; -use tracing::error; /// An error produced by [`DynamicTextureAtlasBuilder`] when trying to add a new /// texture to a [`TextureAtlasLayout`]. diff --git a/crates/bevy_post_process/src/bloom/bloom.wgsl b/crates/bevy_post_process/src/bloom/bloom.wgsl index aa4a2f94c46a2..f149f774afc36 100644 --- a/crates/bevy_post_process/src/bloom/bloom.wgsl +++ b/crates/bevy_post_process/src/bloom/bloom.wgsl @@ -49,6 +49,43 @@ fn karis_average(color: vec3) -> f32 { return 1.0 / (1.0 + luma); } +// https://www.shadertoy.com/view/mdsyDf +#ifdef FAST_BLUR +fn bloom_down_kernel4(uv: vec2) -> vec3 { + let ps = uniforms.scale / vec2(textureDimensions(input_texture)); + let o = 0.5 + 1.0 / 4.0; + + let a = textureSample(input_texture, s, uv + vec2(-1.0, -1.0) * o * ps).rgb * 0.25; + let b = textureSample(input_texture, s, uv + vec2(1.0, -1.0) * o * ps).rgb * 0.25; + let c = textureSample(input_texture, s, uv + vec2(-1.0, 1.0) * o * ps).rgb * 0.25; + let d = textureSample(input_texture, s, uv + vec2(1.0, 1.0) * o * ps).rgb * 0.25; + +#ifdef FIRST_DOWNSAMPLE + return (a + b + c + d) * karis_average(a + b + c + d); +#else + return a + b + c + d; +#endif +} + +fn bloom_up_kernel4(uv: vec2) -> vec3 { + // Modified version of BloomUpKernel4B https://www.shadertoy.com/view/mdsyDf. I couldn't get a good result with the original version. + let ps = uniforms.scale / vec2(textureDimensions(input_texture)); + let w = vec4(0.211029, 0.288971, 0.288971, 0.211029); + // Add a small offset for better radial symmetry. + let l00 = vec2(0.347209, 0.526425) + 0.1; + let l10 = vec2(0.109840, 0.334045) + 0.1; + let l01 = vec2(0.334045, 0.109840) + 0.1; + let l11 = vec2(0.526425, 0.347209) + 0.1; + + let a = textureSample(input_texture, s, uv + (vec2( -0.5, -1.5) + l00) * ps).rgb * w.x; + let b = textureSample(input_texture, s, uv + (vec2(0.5, -0.5) + l10) * ps).rgb * w.y; + let c = textureSample(input_texture, s, uv + (vec2( -0.5, 0.5) + l01) * ps).rgb * w.z; + let d = textureSample(input_texture, s, uv + (vec2( -1.5,-0.5) + l11) * ps).rgb * w.w; + + return a + b + c + d; +} +#endif + // [COD] slide 153 fn sample_input_13_tap(uv: vec2) -> vec3 { #ifdef UNIFORM_SCALE @@ -161,7 +198,11 @@ fn sample_input_3x3_tent(uv: vec2) -> vec3 { @fragment fn downsample_first(@location(0) output_uv: vec2) -> @location(0) vec4 { let sample_uv = uniforms.viewport.xy + output_uv * uniforms.viewport.zw; +#ifdef FAST_BLUR + var sample = bloom_down_kernel4(sample_uv); +#else var sample = sample_input_13_tap(sample_uv); +#endif // Lower bound of 0.0001 is to avoid propagating multiplying by 0.0 through the // downscaling and upscaling which would result in black boxes. // The upper bound is to prevent NaNs. @@ -178,10 +219,18 @@ fn downsample_first(@location(0) output_uv: vec2) -> @location(0) vec4 @fragment fn downsample(@location(0) uv: vec2) -> @location(0) vec4 { +#ifdef FAST_BLUR + return vec4(bloom_down_kernel4(uv), 1.0); +#else return vec4(sample_input_13_tap(uv), 1.0); +#endif } @fragment fn upsample(@location(0) uv: vec2) -> @location(0) vec4 { +#ifdef FAST_BLUR + return vec4(bloom_up_kernel4(uv), 1.0); +#else return vec4(sample_input_3x3_tent(uv), 1.0); +#endif } diff --git a/crates/bevy_post_process/src/bloom/downsampling_pipeline.rs b/crates/bevy_post_process/src/bloom/downsampling_pipeline.rs index 2e66e1d25dbaf..ba893df6b940c 100644 --- a/crates/bevy_post_process/src/bloom/downsampling_pipeline.rs +++ b/crates/bevy_post_process/src/bloom/downsampling_pipeline.rs @@ -40,6 +40,7 @@ pub struct BloomDownsamplingPipelineKeys { prefilter: bool, first_downsample: bool, uniform_scale: bool, + high_quality: bool, } /// The uniform struct extracted from [`Bloom`] attached to a Camera. @@ -79,8 +80,9 @@ pub fn init_bloom_downsampling_pipeline( let sampler = render_device.create_sampler(&SamplerDescriptor { min_filter: FilterMode::Linear, mag_filter: FilterMode::Linear, - address_mode_u: AddressMode::ClampToEdge, - address_mode_v: AddressMode::ClampToEdge, + address_mode_u: AddressMode::ClampToBorder, + address_mode_v: AddressMode::ClampToBorder, + border_color: Some(SamplerBorderColor::TransparentBlack), ..Default::default() }); @@ -110,6 +112,10 @@ impl SpecializedRenderPipeline for BloomDownsamplingPipeline { shader_defs.push("FIRST_DOWNSAMPLE".into()); } + if !key.high_quality { + shader_defs.push("FAST_BLUR".into()); + } + if key.prefilter { shader_defs.push("USE_THRESHOLD".into()); } @@ -161,6 +167,7 @@ pub fn prepare_downsampling_pipeline( prefilter, first_downsample: false, uniform_scale: bloom.scale == Vec2::ONE, + high_quality: bloom.high_quality, }, ); @@ -171,6 +178,7 @@ pub fn prepare_downsampling_pipeline( prefilter, first_downsample: true, uniform_scale: bloom.scale == Vec2::ONE, + high_quality: bloom.high_quality, }, ); diff --git a/crates/bevy_post_process/src/bloom/mod.rs b/crates/bevy_post_process/src/bloom/mod.rs index fe22cf6d8365d..49a20719f3cfa 100644 --- a/crates/bevy_post_process/src/bloom/mod.rs +++ b/crates/bevy_post_process/src/bloom/mod.rs @@ -181,7 +181,7 @@ impl ViewNode for BloomNode { &BindGroupEntries::sequential(( // Read from main texture directly view_texture, - &bind_groups.sampler, + &downsampling_pipeline_res.sampler, uniforms.clone(), )), ); @@ -361,16 +361,20 @@ fn prepare_bloom_textures( for (entity, camera, bloom) in &views { if let Some(viewport) = camera.physical_viewport_size { // How many times we can halve the resolution minus one so we don't go unnecessarily low - let mip_count = bloom.max_mip_dimension.ilog2().max(2) - 1; - let mip_height_ratio = if viewport.y != 0 { - bloom.max_mip_dimension as f32 / viewport.y as f32 + let mip_count = bloom + .max_mip_dimension + .ilog2() + .clamp(2, bloom.max_mip_count) + - 1; + let mip_dim_ratio = if viewport.y != 0 && viewport.x != 0 { + (bloom.max_mip_dimension as f32 / viewport.as_vec2()).max_element() } else { 0. }; let texture_descriptor = TextureDescriptor { label: Some("bloom_texture"), - size: (viewport.as_vec2() * mip_height_ratio) + size: (viewport.as_vec2() * mip_dim_ratio) .round() .as_uvec2() .max(UVec2::ONE) @@ -419,7 +423,6 @@ struct BloomBindGroups { cache_key: (TextureId, BufferId), downsampling_bind_groups: Box<[BindGroup]>, upsampling_bind_groups: Box<[BindGroup]>, - sampler: Sampler, } fn prepare_bloom_bind_groups( @@ -430,8 +433,6 @@ fn prepare_bloom_bind_groups( views: Query<(Entity, &BloomTexture, Option<&BloomBindGroups>)>, uniforms: Res>, ) { - let sampler = &downsampling_pipeline.sampler; - for (entity, bloom_texture, bloom_bind_groups) in &views { if let Some(b) = bloom_bind_groups && b.cache_key @@ -452,7 +453,7 @@ fn prepare_bloom_bind_groups( &downsampling_pipeline.bind_group_layout, &BindGroupEntries::sequential(( &bloom_texture.view(mip - 1), - sampler, + &downsampling_pipeline.sampler, uniforms.binding().unwrap(), )), )); @@ -465,7 +466,7 @@ fn prepare_bloom_bind_groups( &upsampling_pipeline.bind_group_layout, &BindGroupEntries::sequential(( &bloom_texture.view(mip), - sampler, + &upsampling_pipeline.sampler, uniforms.binding().unwrap(), )), )); @@ -478,7 +479,6 @@ fn prepare_bloom_bind_groups( ), downsampling_bind_groups: downsampling_bind_groups.into_boxed_slice(), upsampling_bind_groups: upsampling_bind_groups.into_boxed_slice(), - sampler: sampler.clone(), }); } } diff --git a/crates/bevy_post_process/src/bloom/settings.rs b/crates/bevy_post_process/src/bloom/settings.rs index 39457dac8efd2..926470cd4fdfd 100644 --- a/crates/bevy_post_process/src/bloom/settings.rs +++ b/crates/bevy_post_process/src/bloom/settings.rs @@ -116,13 +116,21 @@ pub struct Bloom { pub composite_mode: BloomCompositeMode, /// Maximum size of each dimension for the largest mipchain texture used in downscaling/upscaling. - /// Only tweak if you are seeing visual artifacts. + /// Lower values can improve performance but result in more aliasing. pub max_mip_dimension: u32, + /// Maximum number of mipmaps to use in downscaling/upscaling (default: [`u32::MAX`]). + /// Lower values can improve performance but lose some low frequency contributions. + pub max_mip_count: u32, + /// Amount to stretch the bloom on each axis. Artistic control, can be used to emulate /// anamorphic blur by using a large x-value. For large values, you may need to increase /// [`Bloom::max_mip_dimension`] to reduce sampling artifacts. pub scale: Vec2, + + // Whether to use a high quality bloom implementation (default: true). + // If false, bloom will use an implementation that significantly reduces the number of texture samples and improves performance, but at the cost of lower quality. + pub high_quality: bool, } impl Bloom { @@ -143,6 +151,8 @@ impl Bloom { composite_mode: BloomCompositeMode::EnergyConserving, max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION, scale: Vec2::ONE, + high_quality: true, + max_mip_count: u32::MAX, }; /// Emulates the look of stylized anamorphic bloom, stretched horizontally. @@ -166,6 +176,8 @@ impl Bloom { composite_mode: BloomCompositeMode::Additive, max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION, scale: Vec2::ONE, + high_quality: true, + max_mip_count: u32::MAX, }; /// A preset that applies a very strong bloom, and blurs the whole screen. @@ -181,6 +193,8 @@ impl Bloom { composite_mode: BloomCompositeMode::EnergyConserving, max_mip_dimension: Self::DEFAULT_MAX_MIP_DIMENSION, scale: Vec2::ONE, + high_quality: true, + max_mip_count: u32::MAX, }; } diff --git a/crates/bevy_post_process/src/bloom/upsampling_pipeline.rs b/crates/bevy_post_process/src/bloom/upsampling_pipeline.rs index 775ca55fb3529..09cb9030173ae 100644 --- a/crates/bevy_post_process/src/bloom/upsampling_pipeline.rs +++ b/crates/bevy_post_process/src/bloom/upsampling_pipeline.rs @@ -29,6 +29,7 @@ pub struct UpsamplingPipelineIds { #[derive(Resource)] pub struct BloomUpsamplingPipeline { pub bind_group_layout: BindGroupLayout, + pub sampler: Sampler, /// The asset handle for the fullscreen vertex shader. pub fullscreen_shader: FullscreenShader, /// The fragment shader asset handle. @@ -39,6 +40,7 @@ pub struct BloomUpsamplingPipeline { pub struct BloomUpsamplingPipelineKeys { composite_mode: BloomCompositeMode, final_pipeline: bool, + high_quality: bool, } pub fn init_bloom_upscaling_pipeline( @@ -62,8 +64,18 @@ pub fn init_bloom_upscaling_pipeline( ), ); + // Sampler + let sampler = render_device.create_sampler(&SamplerDescriptor { + min_filter: FilterMode::Linear, + mag_filter: FilterMode::Linear, + address_mode_u: AddressMode::ClampToEdge, + address_mode_v: AddressMode::ClampToEdge, + ..Default::default() + }); + commands.insert_resource(BloomUpsamplingPipeline { bind_group_layout, + sampler, fullscreen_shader: fullscreen_shader.clone(), fragment_shader: load_embedded_asset!(asset_server.as_ref(), "bloom.wgsl"), }); @@ -79,6 +91,11 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline { BLOOM_TEXTURE_FORMAT }; + let mut shader_defs = vec![]; + if !key.high_quality { + shader_defs.push("FAST_BLUR".into()); + } + let color_blend = match key.composite_mode { BloomCompositeMode::EnergyConserving => { // At the time of developing this we decided to blend our @@ -117,6 +134,7 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline { vertex: self.fullscreen_shader.to_vertex_state(), fragment: Some(FragmentState { shader: self.fragment_shader.clone(), + shader_defs, entry_point: Some("upsample".into()), targets: vec![Some(ColorTargetState { format: texture_format, @@ -130,7 +148,6 @@ impl SpecializedRenderPipeline for BloomUpsamplingPipeline { }), write_mask: ColorWrites::ALL, })], - ..default() }), ..default() } @@ -151,6 +168,7 @@ pub fn prepare_upsampling_pipeline( BloomUpsamplingPipelineKeys { composite_mode: bloom.composite_mode, final_pipeline: false, + high_quality: bloom.high_quality, }, ); @@ -160,6 +178,7 @@ pub fn prepare_upsampling_pipeline( BloomUpsamplingPipelineKeys { composite_mode: bloom.composite_mode, final_pipeline: true, + high_quality: bloom.high_quality, }, ); diff --git a/crates/bevy_render/src/render_resource/mod.rs b/crates/bevy_render/src/render_resource/mod.rs index 0a41dfdd17ad9..5d6d281be934b 100644 --- a/crates/bevy_render/src/render_resource/mod.rs +++ b/crates/bevy_render/src/render_resource/mod.rs @@ -55,8 +55,8 @@ pub use wgpu::{ PushConstantRange, RenderPassColorAttachment, RenderPassDepthStencilAttachment, RenderPassDescriptor, RenderPipelineDescriptor as RawRenderPipelineDescriptor, Sampler as WgpuSampler, SamplerBindingType, SamplerBindingType as WgpuSamplerBindingType, - SamplerDescriptor, ShaderModule, ShaderModuleDescriptor, ShaderSource, ShaderStages, - StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, StoreOp, + SamplerBorderColor, SamplerDescriptor, ShaderModule, ShaderModuleDescriptor, ShaderSource, + ShaderStages, StencilFaceState, StencilOperation, StencilState, StorageTextureAccess, StoreOp, TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect, TextureDescriptor, TextureDimension, TextureFormat, TextureFormatFeatureFlags, TextureFormatFeatures, TextureSampleType, TextureUsages, TextureView as WgpuTextureView, diff --git a/crates/bevy_render/src/view/window/screenshot.rs b/crates/bevy_render/src/view/window/screenshot.rs index 4418c7d5d74d2..648988698aaf5 100644 --- a/crates/bevy_render/src/view/window/screenshot.rs +++ b/crates/bevy_render/src/view/window/screenshot.rs @@ -644,8 +644,8 @@ pub(crate) fn collect_screenshots(world: &mut World) { // The polling for this map call is done every frame when the command queue is submitted. buffer_slice.map_async(wgpu::MapMode::Read, move |result| { let err = result.err(); - if err.is_some() { - panic!("{}", err.unwrap().to_string()); + if let Some(e) = err { + panic!("{}", e.to_string()); } tx.try_send(()).unwrap(); }); diff --git a/crates/bevy_shader/src/shader_cache.rs b/crates/bevy_shader/src/shader_cache.rs index 77a004ecebb8e..c9f8c068bfb26 100644 --- a/crates/bevy_shader/src/shader_cache.rs +++ b/crates/bevy_shader/src/shader_cache.rs @@ -5,7 +5,7 @@ use bevy_platform::collections::{hash_map::EntryRef, HashMap, HashSet}; use core::hash::Hash; use naga::valid::Capabilities; use thiserror::Error; -use tracing::{debug, error}; +use tracing::debug; use wgpu_types::{DownlevelFlags, Features}; /// Source of a shader module. diff --git a/examples/2d/bloom_2d.rs b/examples/2d/bloom_2d.rs index c06952bfa4009..48f2eda300815 100644 --- a/examples/2d/bloom_2d.rs +++ b/examples/2d/bloom_2d.rs @@ -110,6 +110,7 @@ fn update_bloom_settings( bloom.prefilter.threshold_softness )); text.push_str(&format!("(I/K) Horizontal Scale: {:.2}\n", bloom.scale.x)); + text.push_str(&format!("(P) High quality: {}\n", bloom.high_quality)); if keycode.just_pressed(KeyCode::Space) { commands.entity(camera_entity).remove::(); @@ -180,6 +181,10 @@ fn update_bloom_settings( bloom.scale.x += dt * 2.0; } bloom.scale.x = bloom.scale.x.clamp(0.0, 16.0); + + if keycode.just_pressed(KeyCode::KeyP) { + bloom.high_quality = !bloom.high_quality; + } } None => { diff --git a/examples/3d/bloom_3d.rs b/examples/3d/bloom_3d.rs index 982b59745fd45..87705a323f187 100644 --- a/examples/3d/bloom_3d.rs +++ b/examples/3d/bloom_3d.rs @@ -134,7 +134,8 @@ fn update_bloom_settings( "(U/J) Threshold softness: {:.2}\n", bloom.prefilter.threshold_softness )); - text.push_str(&format!("(I/K) Horizontal Scale: {:.2}\n", bloom.scale.x)); + text.push_str(&format!("(I/K) Horizontal scale: {:.2}\n", bloom.scale.x)); + text.push_str(&format!("(P) High quality: {}\n", bloom.high_quality)); if keycode.just_pressed(KeyCode::Space) { commands.entity(entity).remove::(); @@ -205,6 +206,10 @@ fn update_bloom_settings( bloom.scale.x += dt * 2.0; } bloom.scale.x = bloom.scale.x.clamp(0.0, 8.0); + + if keycode.just_pressed(KeyCode::KeyP) { + bloom.high_quality = !bloom.high_quality; + } } (entity, None) => { diff --git a/release-content/release-notes/faster_bloom.md b/release-content/release-notes/faster_bloom.md new file mode 100644 index 0000000000000..0dd557c72d469 --- /dev/null +++ b/release-content/release-notes/faster_bloom.md @@ -0,0 +1,7 @@ +--- +title: Add a faster bloom implementation. +authors: ["@beicause"] +pull_requests: [21340] +--- + +Bloom now has a `high_quality` (default: true) option to control whether to use a high quality implementation, or a faster but lower quality implementation. The lower quality bloom still maintains reasonable visual quality while significantly reducing texture sampling. For low-end devices, this could potentially reduce frame time by a few milliseconds.