@@ -23,144 +23,207 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323#include "common.hlsl"
2424//====================
2525
26- static const uint g_motion_blur_samples = 32 ;
27- static const float g_velocity_threshold = 0.005f ;
28- static const float g_color_scale = 0.5f ;
29-
30- groupshared uint g_tile_max_velocity_sqr;
31-
32- // find maximum velocity in a 3x3 neighborhood (dilation)
33- // uses SampleLevel with UVs to handle resolution mismatches (e.g. 4K color vs 1080p velocity )
34- float2 get_velocity_dilated ( float2 uv, float2 velocity_texel_size )
26+ static const int SAMPLE_COUNT = 15 ; // samples per direction (total = 2 * count + 1)
27+ static const float MAX_BLUR_RADIUS_PIXELS = 40.0f ; // maximum blur extent in pixels
28+ static const float DEPTH_SCALE = 100.0f ; // depth comparison sensitivity
29+ static const float VELOCITY_SOFT_COMPARE = 0.02f ; // velocity softness for edge detection
30+ static const float CENTER_WEIGHT = 1.0f ; // weight for center sample
31+ static const float SOFT_Z_EXTENT = 0.1f ; // soft depth extent for bilateral weight
32+
33+ // helper: soft depth comparison (guerrilla games / killzone approach )
34+ float soft_depth_compare ( float depth_a, float depth_b )
3535{
36- float2 max_velocity = 0.0f ;
37- float max_len = 0.0f ;
38-
39- [unroll]
40- for (int y = -1 ; y <= 1 ; ++y)
41- {
42- [unroll]
43- for (int x = -1 ; x <= 1 ; ++x)
44- {
45- float2 offset = float2 (x, y) * velocity_texel_size;
46- float2 v = tex_velocity.SampleLevel (samplers[sampler_point_clamp], uv + offset, 0 ).xy;
47- float len = length (v);
48-
49- if (len > max_len)
50- {
51- max_len = len;
52- max_velocity = v;
53- }
54- }
55- }
56- return max_velocity;
36+ // returns 1 when depth_a is behind or equal to depth_b, with soft falloff
37+ float diff = (depth_a - depth_b) * DEPTH_SCALE;
38+ return saturate (1.0f - diff);
5739}
5840
59- // simple interleaved gradient noise to reduce banding
60- float get_noise ( uint2 pixel_coord )
41+ // helper: cone weight - samples closer to center contribute more (mcguire approach)
42+ float cone_weight ( float distance_from_center, float blur_length )
6143{
62- float3 magic = float3 (0.06711056f , 0.00583715f , 52.9829189f );
63- return frac (magic.z * frac (dot (float2 (pixel_coord), magic.xy)));
44+ return saturate (1.0f - distance_from_center / (blur_length + FLT_MIN));
6445}
6546
66- [ numthreads (THREAD_GROUP_COUNT_X, THREAD_GROUP_COUNT_Y, 1 )]
67- void main_cs ( uint3 thread_id : SV_DispatchThreadID , uint3 group_thread_id : SV_GroupThreadID , uint group_index : SV_GroupIndex )
47+ // helper: cylinder weight - sample contributes based on its own blur coverage
48+ float cylinder_weight ( float sample_blur_length, float distance_from_center )
6849{
69- // 1. get dimensions of the input color (e.g. 4k)
70- float2 resolution_out;
71- tex.GetDimensions (resolution_out.x, resolution_out.y);
72-
73- // 2. get dimensions of the velocity buffer (e.g. 1080p)
74- float2 resolution_velocity;
75- tex_velocity.GetDimensions (resolution_velocity.x, resolution_velocity.y);
76- float2 velocity_texel_size = 1.0f / resolution_velocity;
77-
78- // setup coordinates
79- uint2 pixel_coord = thread_id.xy;
80- float2 uv = (pixel_coord + 0.5f ) / resolution_out;
81-
82- // 3. get velocities using UVs to handle the resolution mismatch
83- // center uses point sampling to get the exact velocity for this screen area
84- float2 center_velocity = tex_velocity.SampleLevel (samplers[sampler_point_clamp], uv, 0 ).xy;
85- float2 dilated_velocity = get_velocity_dilated (uv, velocity_texel_size);
86-
87- // physically based motion blur strength
88- float shutter_speed = pass_get_f3_value ().x;
89- float shutter_ratio = shutter_speed / (buffer_frame.delta_time + FLT_MIN);
90- float2 center_velocity_uv = (center_velocity / 2.0f ) * shutter_ratio;
91- float2 dilated_velocity_uv = (dilated_velocity / 2.0f ) * shutter_ratio;
92-
93- // compute max velocity for tile
94- if (group_index == 0 )
95- {
96- g_tile_max_velocity_sqr = 0 ;
97- }
98- GroupMemoryBarrierWithGroupSync ();
99-
100- uint velocity_sqr = (uint )(dot (dilated_velocity_uv, dilated_velocity_uv) * 1000000.0f );
101- InterlockedMax (g_tile_max_velocity_sqr, velocity_sqr);
102-
103- GroupMemoryBarrierWithGroupSync ();
104-
105- // early exit if tile has insignificant motion
106- if (sqrt (float (g_tile_max_velocity_sqr) / 1000000.0f ) < g_velocity_threshold)
107- {
108- tex_uav[pixel_coord] = tex.Load (int3 (pixel_coord, 0 ));
109- return ;
110- }
50+ return saturate (1.0f - abs (distance_from_center - sample_blur_length) / (sample_blur_length + FLT_MIN));
51+ }
11152
112- // classify pixel (background vs foreground)
113- float center_speed = length (center_velocity_uv);
114- float dilated_speed = length (dilated_velocity_uv);
115- bool is_background = center_speed < (dilated_speed - g_velocity_threshold);
53+ // helper: velocity-to-pixel conversion accounting for resolution differences
54+ float2 velocity_to_pixels (float2 velocity_uv, float2 resolution_color)
55+ {
56+ // velocity is in uv space, convert to pixels
57+ return velocity_uv * resolution_color;
58+ }
11659
117- // early exit if we are static and there is no fast neighbor
118- if (!is_background && center_speed < g_velocity_threshold)
60+ // reconstruction filter for plausible motion blur (based on mcguire 2012)
61+ float4 motion_blur_reconstruction (
62+ float2 uv,
63+ float2 pixel_coord,
64+ float2 resolution_color,
65+ float2 resolution_velocity,
66+ float shutter_ratio,
67+ float noise
68+ )
69+ {
70+ // sample center pixel
71+ float4 center_color = tex.SampleLevel (samplers[sampler_bilinear_clamp], uv, 0 );
72+ float center_depth = get_linear_depth (uv);
73+
74+ // sample velocity at color resolution uv (handles resolution mismatch)
75+ float2 center_velocity_uv = tex_velocity.SampleLevel (samplers[sampler_bilinear_clamp], uv, 0 ).xy;
76+ float2 center_velocity_pixels = velocity_to_pixels (center_velocity_uv, resolution_color);
77+ float center_blur_length_raw = length (center_velocity_pixels);
78+
79+ // early exit for static or nearly static pixels (check raw velocity before shutter scaling)
80+ if (center_blur_length_raw < 0.5f )
11981 {
120- tex_uav[pixel_coord] = tex.Load (int3 (pixel_coord, 0 ));
121- return ;
82+ return center_color;
12283 }
123-
124- // reconstruction loop setup
125- float4 center_color = tex.Load (int3 (pixel_coord, 0 ));
126- float center_depth = get_linear_depth (uv);
127- float4 accum_color = center_color;
128- float total_weight = 1.0f ;
129- float2 search_vector = dilated_velocity_uv;
130- float noise = get_noise (pixel_coord);
84+
85+ // apply shutter ratio after early exit check
86+ float center_blur_length = center_blur_length_raw * shutter_ratio;
87+
88+ // clamp blur length for performance
89+ float clamped_blur_length = min (center_blur_length, MAX_BLUR_RADIUS_PIXELS);
90+
91+ // normalized velocity direction (use raw velocity for direction, independent of shutter)
92+ float2 velocity_dir = center_velocity_pixels / (center_blur_length_raw + FLT_MIN);
93+
94+ // accumulation
95+ float4 color_accum = center_color * CENTER_WEIGHT;
96+ float weight_accum = CENTER_WEIGHT;
97+
98+ // jittered sample offset for temporal stability (integrates with taa)
99+ float jitter = (noise - 0.5f ) * 2.0f ;
100+
101+ // sample along velocity direction (both forward and backward)
131102 [unroll]
132- for (uint i = 1 ; i < g_motion_blur_samples ; ++i)
103+ for (int i = 1 ; i <= SAMPLE_COUNT ; ++i)
133104 {
134- float t = (float (i) + noise) / float (g_motion_blur_samples) - 0.5f ;
135- float2 sample_uv = uv + search_vector * t;
136-
137- float is_on_screen = step (0.0f , sample_uv.x) * step (sample_uv.x, 1.0f ) * step (0.0f , sample_uv.y) * step (sample_uv.y, 1.0f );
105+ // non-linear sample distribution - more samples near center for smooth gradients
106+ float t = (float )i / (float )SAMPLE_COUNT;
107+ t = t * t; // quadratic distribution concentrates samples near center
138108
139- // bilinear sample for color (smoothness)
140- float4 sample_color = tex.SampleLevel (samplers[sampler_bilinear_clamp], sample_uv, 0 );
141- float sample_depth = get_linear_depth (sample_uv);
142-
143- // depth weighting logic
144- float is_foreground_sample = step (sample_depth, center_depth - 0.01f );
145- float depth_weight = 1.0f ;
146-
147- if (is_background)
109+ // add temporal jitter for smooth accumulation across frames
110+ float sample_distance = t * clamped_blur_length + jitter * (clamped_blur_length / (float )SAMPLE_COUNT);
111+ sample_distance = max (sample_distance, 0.0f );
112+
113+ // sample positions in both directions
114+ float2 offset = velocity_dir * sample_distance / resolution_color;
115+ float2 uv_forward = uv + offset;
116+ float2 uv_backward = uv - offset;
117+
118+ // forward sample
119+ if (is_valid_uv (uv_forward))
148120 {
149- depth_weight = is_foreground_sample;
121+ float4 sample_color = tex.SampleLevel (samplers[sampler_bilinear_clamp], uv_forward, 0 );
122+ float sample_depth = get_linear_depth (uv_forward);
123+ float2 sample_velocity_uv = tex_velocity.SampleLevel (samplers[sampler_bilinear_clamp], uv_forward, 0 ).xy;
124+ float2 sample_velocity_px = velocity_to_pixels (sample_velocity_uv, resolution_color);
125+ float sample_blur_length = min (length (sample_velocity_px) * shutter_ratio, MAX_BLUR_RADIUS_PIXELS);
126+
127+ // bilateral weights based on depth relationship (guerrilla games approach)
128+ // foreground samples always contribute, background samples are occluded
129+ float depth_weight_forward = soft_depth_compare (center_depth, sample_depth); // sample in front
130+ float depth_weight_backward = soft_depth_compare (sample_depth, center_depth); // center in front
131+
132+ // velocity-based weights (mcguire reconstruction filter)
133+ float cone = cone_weight (sample_distance, sample_blur_length);
134+ float cylinder = cylinder_weight (sample_blur_length, sample_distance);
135+
136+ // combine weights - sample contributes if it could affect this pixel
137+ float weight = (depth_weight_forward * cone + depth_weight_backward * cylinder) *
138+ saturate (sample_blur_length / (clamped_blur_length + FLT_MIN));
139+
140+ // soft falloff at the edges for smoother appearance
141+ weight *= smoothstep (0.0f , 0.1f , 1.0f - t);
142+
143+ color_accum += sample_color * weight;
144+ weight_accum += weight;
150145 }
151- else
146+
147+ // backward sample
148+ if (is_valid_uv (uv_backward))
152149 {
153- float depth_diff = abs (center_depth - sample_depth);
154- depth_weight = exp (-depth_diff);
150+ float4 sample_color = tex.SampleLevel (samplers[sampler_bilinear_clamp], uv_backward, 0 );
151+ float sample_depth = get_linear_depth (uv_backward);
152+ float2 sample_velocity_uv = tex_velocity.SampleLevel (samplers[sampler_bilinear_clamp], uv_backward, 0 ).xy;
153+ float2 sample_velocity_px = velocity_to_pixels (sample_velocity_uv, resolution_color);
154+ float sample_blur_length = min (length (sample_velocity_px) * shutter_ratio, MAX_BLUR_RADIUS_PIXELS);
155+
156+ // bilateral weights
157+ float depth_weight_forward = soft_depth_compare (center_depth, sample_depth);
158+ float depth_weight_backward = soft_depth_compare (sample_depth, center_depth);
159+
160+ // velocity weights
161+ float cone = cone_weight (sample_distance, sample_blur_length);
162+ float cylinder = cylinder_weight (sample_blur_length, sample_distance);
163+
164+ float weight = (depth_weight_forward * cone + depth_weight_backward * cylinder) *
165+ saturate (sample_blur_length / (clamped_blur_length + FLT_MIN));
166+
167+ weight *= smoothstep (0.0f , 0.1f , 1.0f - t);
168+
169+ color_accum += sample_color * weight;
170+ weight_accum += weight;
155171 }
156-
157- float color_diff = length (center_color.rgb - sample_color.rgb);
158- float color_weight = exp (-color_diff * g_color_scale);
159-
160- float weight = is_on_screen * depth_weight * color_weight;
161- accum_color += sample_color * weight;
162- total_weight += weight;
163172 }
173+
174+ // normalize
175+ float4 result = color_accum / (weight_accum + FLT_MIN);
176+
177+ // preserve alpha channel
178+ result.a = center_color.a;
179+
180+ return result;
181+ }
164182
165- tex_uav[pixel_coord] = accum_color / total_weight;
183+ [numthreads (THREAD_GROUP_COUNT_X, THREAD_GROUP_COUNT_Y, 1 )]
184+ void main_cs (uint3 thread_id : SV_DispatchThreadID , uint3 group_thread_id : SV_GroupThreadID , uint group_index : SV_GroupIndex )
185+ {
186+ // get buffer dimensions (color/output can be different from velocity)
187+ float2 resolution_color;
188+ tex.GetDimensions (resolution_color.x, resolution_color.y);
189+
190+ float2 resolution_output;
191+ tex_uav.GetDimensions (resolution_output.x, resolution_output.y);
192+
193+ float2 resolution_velocity;
194+ tex_velocity.GetDimensions (resolution_velocity.x, resolution_velocity.y);
195+
196+ // compute pixel coordinate and uv
197+ uint2 pixel_coord = thread_id.xy;
198+ float2 uv = (pixel_coord + 0.5f ) / resolution_output;
199+
200+ // early exit for out of bounds
201+ if (any (pixel_coord >= uint2 (resolution_output)))
202+ {
203+ return ;
204+ }
205+
206+ // get shutter parameters for physically-based blur
207+ // shutter_ratio represents how much of the frame's motion is captured
208+ // - ratio < 1: fast shutter, freezes motion (e.g. 1/125s at 60fps = 0.5)
209+ // - ratio = 1: shutter matches frame time, standard blur
210+ // - ratio > 1: slow shutter, motion spans multiple frames (e.g. 1/30s at 60fps = 2.0)
211+ float shutter_speed = pass_get_f3_value ().x;
212+ float shutter_ratio = clamp (shutter_speed / (buffer_frame.delta_time + FLT_MIN), 0.0f , 3.0f );
213+
214+ // generate per-pixel temporal noise for smooth sample distribution across frames
215+ // this integrates with taa for artifact-free motion blur
216+ float noise = noise_interleaved_gradient (float2 (pixel_coord), true );
217+
218+ // perform motion blur reconstruction
219+ float4 result = motion_blur_reconstruction (
220+ uv,
221+ float2 (pixel_coord),
222+ resolution_color,
223+ resolution_velocity,
224+ shutter_ratio,
225+ noise
226+ );
227+
228+ tex_uav[pixel_coord] = result;
166229}
0 commit comments