Skip to content

Commit 962f9ef

Browse files
mvaligurskyMartin Valigursky
andauthored
perf: use half-precision types in bloom downsample/upsample WGSL shaders (#8508)
Converts intermediate computations from f32 to half (f16) types in the bloom shader chain, reducing register pressure and enabling 2x ALU throughput on GPUs with native f16 support. Restructures upsample accumulation from accumulate-then-divide to multiply-before-accumulate to prevent f16 overflow. Made-with: Cursor Co-authored-by: Martin Valigursky <mvaligursky@snapchat.com>
1 parent ec6e1d0 commit 962f9ef

File tree

3 files changed

+38
-40
lines changed

3 files changed

+38
-40
lines changed

src/scene/shader-lib/glsl/chunks/render-pass/frag/upsample.js

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@ export default /* glsl */`
2020
vec3 h = texture2D (sourceTexture, vec2 (uv0.x, uv0.y - y)).rgb;
2121
vec3 i = texture2D (sourceTexture, vec2 (uv0.x + x, uv0.y - y)).rgb;
2222
23-
vec3 value = e * 4.0;
24-
value += (b + d + f + h) * 2.0;
25-
value += (a + c + g + i);
26-
value *= 1.0 / 16.0;
23+
vec3 value = e * 0.25;
24+
value += (b + d + f + h) * 0.125;
25+
value += (a + c + g + i) * 0.0625;
2726
2827
gl_FragColor = vec4(value, 1.0);
2928
}

src/scene/shader-lib/wgsl/chunks/render-pass/frag/downsample.js

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,47 +13,47 @@ varying uv0: vec2f;
1313
fn fragmentMain(input: FragmentInput) -> FragmentOutput {
1414
var output: FragmentOutput;
1515
16-
let e: vec3f = textureSample(sourceTexture, sourceTextureSampler, input.uv0).rgb;
16+
let e: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, input.uv0).rgb);
1717
1818
#ifdef BOXFILTER
19-
var value: vec3f = e;
19+
var value: half3 = e;
2020
2121
#ifdef PREMULTIPLY
22-
let premultiply: f32 = textureSample(premultiplyTexture, premultiplyTextureSampler, input.uv0).{PREMULTIPLY_SRC_CHANNEL};
23-
value = value * vec3f(premultiply);
22+
let premultiply: half = half(textureSample(premultiplyTexture, premultiplyTextureSampler, input.uv0).{PREMULTIPLY_SRC_CHANNEL});
23+
value *= premultiply;
2424
#endif
2525
#else
2626
2727
let x: f32 = uniform.sourceInvResolution.x;
2828
let y: f32 = uniform.sourceInvResolution.y;
2929
30-
let a: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - 2.0 * x, input.uv0.y + 2.0 * y)).rgb;
31-
let b: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y + 2.0 * y)).rgb;
32-
let c: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + 2.0 * x, input.uv0.y + 2.0 * y)).rgb;
30+
let a: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - 2.0 * x, input.uv0.y + 2.0 * y)).rgb);
31+
let b: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y + 2.0 * y)).rgb);
32+
let c: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + 2.0 * x, input.uv0.y + 2.0 * y)).rgb);
3333
34-
let d: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - 2.0 * x, input.uv0.y)).rgb;
35-
let f: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + 2.0 * x, input.uv0.y)).rgb;
34+
let d: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - 2.0 * x, input.uv0.y)).rgb);
35+
let f: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + 2.0 * x, input.uv0.y)).rgb);
3636
37-
let g: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - 2.0 * x, input.uv0.y - 2.0 * y)).rgb;
38-
let h: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y - 2.0 * y)).rgb;
39-
let i: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + 2.0 * x, input.uv0.y - 2.0 * y)).rgb;
37+
let g: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - 2.0 * x, input.uv0.y - 2.0 * y)).rgb);
38+
let h: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y - 2.0 * y)).rgb);
39+
let i: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + 2.0 * x, input.uv0.y - 2.0 * y)).rgb);
4040
41-
let j: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y + y)).rgb;
42-
let k: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y + y)).rgb;
43-
let l: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y - y)).rgb;
44-
let m: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y - y)).rgb;
41+
let j: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y + y)).rgb);
42+
let k: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y + y)).rgb);
43+
let l: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y - y)).rgb);
44+
let m: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y - y)).rgb);
4545
46-
var value: vec3f = e * 0.125;
47-
value = value + (a + c + g + i) * 0.03125;
48-
value = value + (b + d + f + h) * 0.0625;
49-
value = value + (j + k + l + m) * 0.125;
46+
var value: half3 = e * half(0.125);
47+
value += (a + c + g + i) * half(0.03125);
48+
value += (b + d + f + h) * half(0.0625);
49+
value += (j + k + l + m) * half(0.125);
5050
#endif
5151
5252
#ifdef REMOVE_INVALID
53-
value = max(value, vec3f(0.0));
53+
value = max(value, half3(0.0));
5454
#endif
5555
56-
output.color = vec4f(value, 1.0);
56+
output.color = vec4f(vec3f(value), 1.0);
5757
return output;
5858
}
5959
`;

src/scene/shader-lib/wgsl/chunks/render-pass/frag/upsample.js

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,23 @@ export default /* wgsl */`
1111
let x: f32 = uniform.sourceInvResolution.x;
1212
let y: f32 = uniform.sourceInvResolution.y;
1313
14-
let a: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y + y)).rgb;
15-
let b: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y + y)).rgb;
16-
let c: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y + y)).rgb;
14+
let a: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y + y)).rgb);
15+
let b: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y + y)).rgb);
16+
let c: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y + y)).rgb);
1717
18-
let d: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y)).rgb;
19-
let e: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y)).rgb;
20-
let f: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y)).rgb;
18+
let d: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y)).rgb);
19+
let e: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y)).rgb);
20+
let f: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y)).rgb);
2121
22-
let g: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y - y)).rgb;
23-
let h: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y - y)).rgb;
24-
let i: vec3f = textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y - y)).rgb;
22+
let g: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x - x, input.uv0.y - y)).rgb);
23+
let h: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x, input.uv0.y - y)).rgb);
24+
let i: half3 = half3(textureSample(sourceTexture, sourceTextureSampler, vec2f(input.uv0.x + x, input.uv0.y - y)).rgb);
2525
26-
var value: vec3f = e * 4.0;
27-
value = value + (b + d + f + h) * 2.0;
28-
value = value + (a + c + g + i);
29-
value = value * (1.0 / 16.0);
26+
var value: half3 = e * half(0.25);
27+
value += (b + d + f + h) * half(0.125);
28+
value += (a + c + g + i) * half(0.0625);
3029
31-
output.color = vec4f(value, 1.0);
30+
output.color = vec4f(vec3f(value), 1.0);
3231
return output;
3332
}
3433
`;

0 commit comments

Comments
 (0)