Skip to content

Commit 271ae4a

Browse files
committed
[motionblur] brand new, top of the line
1 parent 98f0b7b commit 271ae4a

File tree

1 file changed

+183
-120
lines changed

1 file changed

+183
-120
lines changed

data/shaders/motion_blur.hlsl

Lines changed: 183 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -23,144 +23,207 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323
#include "common.hlsl"
2424
//====================
2525

26-
static const uint g_motion_blur_samples = 32;
27-
static const float g_velocity_threshold = 0.005f;
28-
static const float g_color_scale = 0.5f;
29-
30-
groupshared uint g_tile_max_velocity_sqr;
31-
32-
// find maximum velocity in a 3x3 neighborhood (dilation)
33-
// uses SampleLevel with UVs to handle resolution mismatches (e.g. 4K color vs 1080p velocity)
34-
float2 get_velocity_dilated(float2 uv, float2 velocity_texel_size)
26+
static const int SAMPLE_COUNT = 15; // samples per direction (total = 2 * count + 1)
27+
static const float MAX_BLUR_RADIUS_PIXELS = 40.0f; // maximum blur extent in pixels
28+
static const float DEPTH_SCALE = 100.0f; // depth comparison sensitivity
29+
static const float VELOCITY_SOFT_COMPARE = 0.02f; // velocity softness for edge detection
30+
static const float CENTER_WEIGHT = 1.0f; // weight for center sample
31+
static const float SOFT_Z_EXTENT = 0.1f; // soft depth extent for bilateral weight
32+
33+
// helper: soft depth comparison (guerrilla games / killzone approach)
34+
float soft_depth_compare(float depth_a, float depth_b)
3535
{
36-
float2 max_velocity = 0.0f;
37-
float max_len = 0.0f;
38-
39-
[unroll]
40-
for (int y = -1; y <= 1; ++y)
41-
{
42-
[unroll]
43-
for (int x = -1; x <= 1; ++x)
44-
{
45-
float2 offset = float2(x, y) * velocity_texel_size;
46-
float2 v = tex_velocity.SampleLevel(samplers[sampler_point_clamp], uv + offset, 0).xy;
47-
float len = length(v);
48-
49-
if (len > max_len)
50-
{
51-
max_len = len;
52-
max_velocity = v;
53-
}
54-
}
55-
}
56-
return max_velocity;
36+
// returns 1 when depth_a is behind or equal to depth_b, with soft falloff
37+
float diff = (depth_a - depth_b) * DEPTH_SCALE;
38+
return saturate(1.0f - diff);
5739
}
5840

59-
// simple interleaved gradient noise to reduce banding
60-
float get_noise(uint2 pixel_coord)
41+
// helper: cone weight - samples closer to center contribute more (mcguire approach)
42+
float cone_weight(float distance_from_center, float blur_length)
6143
{
62-
float3 magic = float3(0.06711056f, 0.00583715f, 52.9829189f);
63-
return frac(magic.z * frac(dot(float2(pixel_coord), magic.xy)));
44+
return saturate(1.0f - distance_from_center / (blur_length + FLT_MIN));
6445
}
6546

66-
[numthreads(THREAD_GROUP_COUNT_X, THREAD_GROUP_COUNT_Y, 1)]
67-
void main_cs(uint3 thread_id : SV_DispatchThreadID, uint3 group_thread_id : SV_GroupThreadID, uint group_index : SV_GroupIndex)
47+
// helper: cylinder weight - sample contributes based on its own blur coverage
48+
float cylinder_weight(float sample_blur_length, float distance_from_center)
6849
{
69-
// 1. get dimensions of the input color (e.g. 4k)
70-
float2 resolution_out;
71-
tex.GetDimensions(resolution_out.x, resolution_out.y);
72-
73-
// 2. get dimensions of the velocity buffer (e.g. 1080p)
74-
float2 resolution_velocity;
75-
tex_velocity.GetDimensions(resolution_velocity.x, resolution_velocity.y);
76-
float2 velocity_texel_size = 1.0f / resolution_velocity;
77-
78-
// setup coordinates
79-
uint2 pixel_coord = thread_id.xy;
80-
float2 uv = (pixel_coord + 0.5f) / resolution_out;
81-
82-
// 3. get velocities using UVs to handle the resolution mismatch
83-
// center uses point sampling to get the exact velocity for this screen area
84-
float2 center_velocity = tex_velocity.SampleLevel(samplers[sampler_point_clamp], uv, 0).xy;
85-
float2 dilated_velocity = get_velocity_dilated(uv, velocity_texel_size);
86-
87-
// physically based motion blur strength
88-
float shutter_speed = pass_get_f3_value().x;
89-
float shutter_ratio = shutter_speed / (buffer_frame.delta_time + FLT_MIN);
90-
float2 center_velocity_uv = (center_velocity / 2.0f) * shutter_ratio;
91-
float2 dilated_velocity_uv = (dilated_velocity / 2.0f) * shutter_ratio;
92-
93-
// compute max velocity for tile
94-
if (group_index == 0)
95-
{
96-
g_tile_max_velocity_sqr = 0;
97-
}
98-
GroupMemoryBarrierWithGroupSync();
99-
100-
uint velocity_sqr = (uint)(dot(dilated_velocity_uv, dilated_velocity_uv) * 1000000.0f);
101-
InterlockedMax(g_tile_max_velocity_sqr, velocity_sqr);
102-
103-
GroupMemoryBarrierWithGroupSync();
104-
105-
// early exit if tile has insignificant motion
106-
if (sqrt(float(g_tile_max_velocity_sqr) / 1000000.0f) < g_velocity_threshold)
107-
{
108-
tex_uav[pixel_coord] = tex.Load(int3(pixel_coord, 0));
109-
return;
110-
}
50+
return saturate(1.0f - abs(distance_from_center - sample_blur_length) / (sample_blur_length + FLT_MIN));
51+
}
11152

112-
// classify pixel (background vs foreground)
113-
float center_speed = length(center_velocity_uv);
114-
float dilated_speed = length(dilated_velocity_uv);
115-
bool is_background = center_speed < (dilated_speed - g_velocity_threshold);
53+
// helper: velocity-to-pixel conversion accounting for resolution differences
54+
float2 velocity_to_pixels(float2 velocity_uv, float2 resolution_color)
55+
{
56+
// velocity is in uv space, convert to pixels
57+
return velocity_uv * resolution_color;
58+
}
11659

117-
// early exit if we are static and there is no fast neighbor
118-
if (!is_background && center_speed < g_velocity_threshold)
60+
// reconstruction filter for plausible motion blur (based on mcguire 2012)
61+
float4 motion_blur_reconstruction(
62+
float2 uv,
63+
float2 pixel_coord,
64+
float2 resolution_color,
65+
float2 resolution_velocity,
66+
float shutter_ratio,
67+
float noise
68+
)
69+
{
70+
// sample center pixel
71+
float4 center_color = tex.SampleLevel(samplers[sampler_bilinear_clamp], uv, 0);
72+
float center_depth = get_linear_depth(uv);
73+
74+
// sample velocity at color resolution uv (handles resolution mismatch)
75+
float2 center_velocity_uv = tex_velocity.SampleLevel(samplers[sampler_bilinear_clamp], uv, 0).xy;
76+
float2 center_velocity_pixels = velocity_to_pixels(center_velocity_uv, resolution_color);
77+
float center_blur_length_raw = length(center_velocity_pixels);
78+
79+
// early exit for static or nearly static pixels (check raw velocity before shutter scaling)
80+
if (center_blur_length_raw < 0.5f)
11981
{
120-
tex_uav[pixel_coord] = tex.Load(int3(pixel_coord, 0));
121-
return;
82+
return center_color;
12283
}
123-
124-
// reconstruction loop setup
125-
float4 center_color = tex.Load(int3(pixel_coord, 0));
126-
float center_depth = get_linear_depth(uv);
127-
float4 accum_color = center_color;
128-
float total_weight = 1.0f;
129-
float2 search_vector = dilated_velocity_uv;
130-
float noise = get_noise(pixel_coord);
84+
85+
// apply shutter ratio after early exit check
86+
float center_blur_length = center_blur_length_raw * shutter_ratio;
87+
88+
// clamp blur length for performance
89+
float clamped_blur_length = min(center_blur_length, MAX_BLUR_RADIUS_PIXELS);
90+
91+
// normalized velocity direction (use raw velocity for direction, independent of shutter)
92+
float2 velocity_dir = center_velocity_pixels / (center_blur_length_raw + FLT_MIN);
93+
94+
// accumulation
95+
float4 color_accum = center_color * CENTER_WEIGHT;
96+
float weight_accum = CENTER_WEIGHT;
97+
98+
// jittered sample offset for temporal stability (integrates with taa)
99+
float jitter = (noise - 0.5f) * 2.0f;
100+
101+
// sample along velocity direction (both forward and backward)
131102
[unroll]
132-
for (uint i = 1; i < g_motion_blur_samples; ++i)
103+
for (int i = 1; i <= SAMPLE_COUNT; ++i)
133104
{
134-
float t = (float(i) + noise) / float(g_motion_blur_samples) - 0.5f;
135-
float2 sample_uv = uv + search_vector * t;
136-
137-
float is_on_screen = step(0.0f, sample_uv.x) * step(sample_uv.x, 1.0f) * step(0.0f, sample_uv.y) * step(sample_uv.y, 1.0f);
105+
// non-linear sample distribution - more samples near center for smooth gradients
106+
float t = (float)i / (float)SAMPLE_COUNT;
107+
t = t * t; // quadratic distribution concentrates samples near center
138108

139-
// bilinear sample for color (smoothness)
140-
float4 sample_color = tex.SampleLevel(samplers[sampler_bilinear_clamp], sample_uv, 0);
141-
float sample_depth = get_linear_depth(sample_uv);
142-
143-
// depth weighting logic
144-
float is_foreground_sample = step(sample_depth, center_depth - 0.01f);
145-
float depth_weight = 1.0f;
146-
147-
if (is_background)
109+
// add temporal jitter for smooth accumulation across frames
110+
float sample_distance = t * clamped_blur_length + jitter * (clamped_blur_length / (float)SAMPLE_COUNT);
111+
sample_distance = max(sample_distance, 0.0f);
112+
113+
// sample positions in both directions
114+
float2 offset = velocity_dir * sample_distance / resolution_color;
115+
float2 uv_forward = uv + offset;
116+
float2 uv_backward = uv - offset;
117+
118+
// forward sample
119+
if (is_valid_uv(uv_forward))
148120
{
149-
depth_weight = is_foreground_sample;
121+
float4 sample_color = tex.SampleLevel(samplers[sampler_bilinear_clamp], uv_forward, 0);
122+
float sample_depth = get_linear_depth(uv_forward);
123+
float2 sample_velocity_uv = tex_velocity.SampleLevel(samplers[sampler_bilinear_clamp], uv_forward, 0).xy;
124+
float2 sample_velocity_px = velocity_to_pixels(sample_velocity_uv, resolution_color);
125+
float sample_blur_length = min(length(sample_velocity_px) * shutter_ratio, MAX_BLUR_RADIUS_PIXELS);
126+
127+
// bilateral weights based on depth relationship (guerrilla games approach)
128+
// foreground samples always contribute, background samples are occluded
129+
float depth_weight_forward = soft_depth_compare(center_depth, sample_depth); // sample in front
130+
float depth_weight_backward = soft_depth_compare(sample_depth, center_depth); // center in front
131+
132+
// velocity-based weights (mcguire reconstruction filter)
133+
float cone = cone_weight(sample_distance, sample_blur_length);
134+
float cylinder = cylinder_weight(sample_blur_length, sample_distance);
135+
136+
// combine weights - sample contributes if it could affect this pixel
137+
float weight = (depth_weight_forward * cone + depth_weight_backward * cylinder) *
138+
saturate(sample_blur_length / (clamped_blur_length + FLT_MIN));
139+
140+
// soft falloff at the edges for smoother appearance
141+
weight *= smoothstep(0.0f, 0.1f, 1.0f - t);
142+
143+
color_accum += sample_color * weight;
144+
weight_accum += weight;
150145
}
151-
else
146+
147+
// backward sample
148+
if (is_valid_uv(uv_backward))
152149
{
153-
float depth_diff = abs(center_depth - sample_depth);
154-
depth_weight = exp(-depth_diff);
150+
float4 sample_color = tex.SampleLevel(samplers[sampler_bilinear_clamp], uv_backward, 0);
151+
float sample_depth = get_linear_depth(uv_backward);
152+
float2 sample_velocity_uv = tex_velocity.SampleLevel(samplers[sampler_bilinear_clamp], uv_backward, 0).xy;
153+
float2 sample_velocity_px = velocity_to_pixels(sample_velocity_uv, resolution_color);
154+
float sample_blur_length = min(length(sample_velocity_px) * shutter_ratio, MAX_BLUR_RADIUS_PIXELS);
155+
156+
// bilateral weights
157+
float depth_weight_forward = soft_depth_compare(center_depth, sample_depth);
158+
float depth_weight_backward = soft_depth_compare(sample_depth, center_depth);
159+
160+
// velocity weights
161+
float cone = cone_weight(sample_distance, sample_blur_length);
162+
float cylinder = cylinder_weight(sample_blur_length, sample_distance);
163+
164+
float weight = (depth_weight_forward * cone + depth_weight_backward * cylinder) *
165+
saturate(sample_blur_length / (clamped_blur_length + FLT_MIN));
166+
167+
weight *= smoothstep(0.0f, 0.1f, 1.0f - t);
168+
169+
color_accum += sample_color * weight;
170+
weight_accum += weight;
155171
}
156-
157-
float color_diff = length(center_color.rgb - sample_color.rgb);
158-
float color_weight = exp(-color_diff * g_color_scale);
159-
160-
float weight = is_on_screen * depth_weight * color_weight;
161-
accum_color += sample_color * weight;
162-
total_weight += weight;
163172
}
173+
174+
// normalize
175+
float4 result = color_accum / (weight_accum + FLT_MIN);
176+
177+
// preserve alpha channel
178+
result.a = center_color.a;
179+
180+
return result;
181+
}
164182

165-
tex_uav[pixel_coord] = accum_color / total_weight;
183+
[numthreads(THREAD_GROUP_COUNT_X, THREAD_GROUP_COUNT_Y, 1)]
184+
void main_cs(uint3 thread_id : SV_DispatchThreadID, uint3 group_thread_id : SV_GroupThreadID, uint group_index : SV_GroupIndex)
185+
{
186+
// get buffer dimensions (color/output can be different from velocity)
187+
float2 resolution_color;
188+
tex.GetDimensions(resolution_color.x, resolution_color.y);
189+
190+
float2 resolution_output;
191+
tex_uav.GetDimensions(resolution_output.x, resolution_output.y);
192+
193+
float2 resolution_velocity;
194+
tex_velocity.GetDimensions(resolution_velocity.x, resolution_velocity.y);
195+
196+
// compute pixel coordinate and uv
197+
uint2 pixel_coord = thread_id.xy;
198+
float2 uv = (pixel_coord + 0.5f) / resolution_output;
199+
200+
// early exit for out of bounds
201+
if (any(pixel_coord >= uint2(resolution_output)))
202+
{
203+
return;
204+
}
205+
206+
// get shutter parameters for physically-based blur
207+
// shutter_ratio represents how much of the frame's motion is captured
208+
// - ratio < 1: fast shutter, freezes motion (e.g. 1/125s at 60fps = 0.5)
209+
// - ratio = 1: shutter matches frame time, standard blur
210+
// - ratio > 1: slow shutter, motion spans multiple frames (e.g. 1/30s at 60fps = 2.0)
211+
float shutter_speed = pass_get_f3_value().x;
212+
float shutter_ratio = clamp(shutter_speed / (buffer_frame.delta_time + FLT_MIN), 0.0f, 3.0f);
213+
214+
// generate per-pixel temporal noise for smooth sample distribution across frames
215+
// this integrates with taa for artifact-free motion blur
216+
float noise = noise_interleaved_gradient(float2(pixel_coord), true);
217+
218+
// perform motion blur reconstruction
219+
float4 result = motion_blur_reconstruction(
220+
uv,
221+
float2(pixel_coord),
222+
resolution_color,
223+
resolution_velocity,
224+
shutter_ratio,
225+
noise
226+
);
227+
228+
tex_uav[pixel_coord] = result;
166229
}

0 commit comments

Comments
 (0)