Skip to content

Commit 7ee986f

Browse files
mvaligurskyMartin Valigursky
andauthored
Workaround for packHalf2x16 precision issues on some Android devices (#8121)
* Workaround for packHalf2x16 precision issues on some Android devices * tabs --------- Co-authored-by: Martin Valigursky <[email protected]>
1 parent 8d8f015 commit 7ee986f

File tree

9 files changed

+219
-22
lines changed

9 files changed

+219
-22
lines changed

src/platform/graphics/graphics-device.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,12 @@ class GraphicsDevice extends EventHandler {
545545
if (this.textureFloatFilterable) capsDefines.set('CAPS_TEXTURE_FLOAT_FILTERABLE', '');
546546
if (this.textureFloatRenderable) capsDefines.set('CAPS_TEXTURE_FLOAT_RENDERABLE', '');
547547
if (this.supportsMultiDraw) capsDefines.set('CAPS_MULTI_DRAW', '');
548+
549+
// Platform defines
550+
if (platform.desktop) capsDefines.set('PLATFORM_DESKTOP', '');
551+
if (platform.mobile) capsDefines.set('PLATFORM_MOBILE', '');
552+
if (platform.android) capsDefines.set('PLATFORM_ANDROID', '');
553+
if (platform.ios) capsDefines.set('PLATFORM_IOS', '');
548554
}
549555

550556
/**

src/scene/shader-lib/glsl/chunks/gsplat/frag/gsplatCopyToWorkbuffer.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export default /* glsl */`
88
#include "gsplatEvalSHVS"
99
#include "gsplatQuatToMat3VS"
1010
#include "gsplatSourceFormatVS"
11+
#include "packHalfPS"
1112
1213
uniform mat4 uTransform;
1314
@@ -120,8 +121,8 @@ void main(void) {
120121
pcFragColor0 = color;
121122
#endif
122123
#ifndef GSPLAT_COLOR_ONLY
123-
pcFragColor1 = uvec4(floatBitsToUint(modelCenter.x), floatBitsToUint(modelCenter.y), floatBitsToUint(modelCenter.z), packHalf2x16(vec2(covA.z, covB.z)));
124-
pcFragColor2 = uvec2(packHalf2x16(covA.xy), packHalf2x16(covB.xy));
124+
pcFragColor1 = uvec4(floatBitsToUint(modelCenter.x), floatBitsToUint(modelCenter.y), floatBitsToUint(modelCenter.z), packHalf2x16Safe(vec2(covA.z, covB.z)));
125+
pcFragColor2 = uvec2(packHalf2x16Safe(covA.xy), packHalf2x16Safe(covB.xy));
125126
#endif
126127
}
127128
}

src/scene/shader-lib/glsl/chunks/gsplat/vert/gsplatCompressedData.js

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
export default /* glsl */`
2+
#include "gsplatPackingPS"
3+
24
uniform highp usampler2D packedTexture;
35
uniform highp sampler2D chunkTexture;
46
@@ -18,15 +20,6 @@ vec3 unpack111011(uint bits) {
1820
);
1921
}
2022
21-
vec4 unpack8888(uint bits) {
22-
return vec4(
23-
float(bits >> 24u) / 255.0,
24-
float((bits >> 16u) & 0xffu) / 255.0,
25-
float((bits >> 8u) & 0xffu) / 255.0,
26-
float(bits & 0xffu) / 255.0
27-
);
28-
}
29-
3023
const float norm = sqrt(2.0);
3124
3225
vec4 unpackRotation(uint bits) {
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Generic half-float packing with software fallback for subnormals
2+
// Addresses vendor differences in packHalf2x16 subnormal handling (e.g., Adreno (TM) 750 on Samsung Galaxy S24)
3+
export default /* glsl */`
4+
5+
#if defined(PLATFORM_ANDROID)
6+
7+
// Software pack of one f32 -> f16 (low 16 bits). Ties-to-even, full subnormals.
8+
uint floatToHalf(float a) {
9+
uint u = floatBitsToUint(a);
10+
uint sign = (u >> 16u) & 0x8000u;
11+
uint absu = u & 0x7FFFFFFFu;
12+
uint man = u & 0x007FFFFFu;
13+
int e32 = int((u >> 23u) & 0xFFu) - 127;
14+
15+
// NaN / Inf
16+
if ((absu & 0x7F800000u) == 0x7F800000u) {
17+
bool isnan = (man != 0u);
18+
return sign | (isnan ? 0x7E00u : 0x7C00u);
19+
}
20+
21+
// Overflow to Inf
22+
if (e32 > 15) return sign | 0x7C00u;
23+
24+
// Normal half
25+
if (e32 >= -14) {
26+
uint he = uint(e32 + 15);
27+
uint hm = man >> 13u;
28+
uint rem = man & 0x1FFFu;
29+
uint add = (rem > 0x1000u || (rem == 0x1000u && (hm & 1u) == 1u)) ? 1u : 0u;
30+
hm = (hm + add) & 0x3FFu;
31+
if ((hm & 0x400u) != 0u) {
32+
hm = 0u; he = he + 1u;
33+
if (he >= 31u) return sign | 0x7C00u;
34+
}
35+
return sign | (he << 10u) | hm;
36+
}
37+
38+
// Subnormals
39+
if (e32 >= -24) {
40+
uint s = uint(-(e32 + 1));
41+
uint mnorm = 0x00800000u | man;
42+
uint hm = mnorm >> s;
43+
uint mask = (1u << s) - 1u;
44+
uint rem = mnorm & mask;
45+
uint halfBt = 1u << (s - 1u);
46+
uint add = (rem > halfBt || (rem == halfBt && (hm & 1u) == 1u)) ? 1u : 0u;
47+
hm = hm + add;
48+
if (hm >= 0x400u) return sign | (1u << 10u);
49+
return sign | hm;
50+
}
51+
52+
// Underflow to signed zero
53+
return sign;
54+
}
55+
56+
// Hybrid pack: software for subnormals, builtin for normal range
57+
uint packHalf2x16Safe(vec2 v) {
58+
// Convert the input floats to their 32-bit IEEE-754 bit patterns.
59+
// We'll inspect the exponent bits directly to determine their numeric range.
60+
uint u_x = floatBitsToUint(v.x);
61+
uint u_y = floatBitsToUint(v.y);
62+
63+
// Extract the unbiased exponent for each component (float32 uses bias = 127).
64+
// e32 = exponent - 127 ⇒ actual power of two for each value.
65+
int e32_x = int((u_x >> 23u) & 0xFFu) - 127;
66+
int e32_y = int((u_y >> 23u) & 0xFFu) - 127;
67+
68+
// -------------------------------------------------------------------------
69+
// Detect values that would become *subnormal* (or zero) in float16.
70+
//
71+
// e32 < -14 ⇔ |value| < 2^-14 ≈ 6.1035e-5
72+
//
73+
// Many mobile GPUs (including Adreno and Mali) mishandle half-precision
74+
// subnormals—typically flushing them to zero or rounding incorrectly.
75+
// To preserve correct rounding and sign, we use the software conversion
76+
// path (floatToHalf) for these small magnitudes.
77+
//
78+
// The software branch runs very rarely (<0.1% of typical values for
79+
// normalized scene data) and costs only a few ALU instructions, so the
80+
// performance impact is negligible while avoiding visible precision loss.
81+
// -------------------------------------------------------------------------
82+
if (e32_x < -14 || e32_y < -14) {
83+
// Convert both components with the reference software routine
84+
// and pack into a 32-bit uint: low 16 bits = x, high 16 bits = y.
85+
return (floatToHalf(v.y) << 16u) | floatToHalf(v.x);
86+
}
87+
88+
// Normal range: use the fast hardware builtin
89+
return packHalf2x16(v);
90+
}
91+
92+
#else
93+
94+
// On non-Android platforms, use builtin directly (no subnormal workaround needed)
95+
uint packHalf2x16Safe(vec2 v) {
96+
return packHalf2x16(v);
97+
}
98+
99+
#endif
100+
`;

src/scene/shader-lib/glsl/collections/shader-chunks-glsl.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ import normalMapPS from '../chunks/standard/frag/normalMap.js';
8989
import opacityPS from '../chunks/standard/frag/opacity.js';
9090
import opacityDitherPS from '../chunks/standard/frag/opacity-dither.js';
9191
import outputPS from '../chunks/lit/frag/output.js';
92+
import packHalfPS from '../chunks/internal/frag/packHalf.js';
9293
import outputAlphaPS from '../chunks/lit/frag/outputAlpha.js';
9394
import outputTex2DPS from '../chunks/common/frag/outputTex2D.js';
9495
import sheenPS from '../chunks/standard/frag/sheen.js';
@@ -260,6 +261,7 @@ const shaderChunksGLSL = {
260261
opacityDitherPS,
261262
outputPS,
262263
outputAlphaPS,
264+
packHalfPS,
263265
outputTex2DPS,
264266
sheenPS,
265267
sheenGlossPS,

src/scene/shader-lib/wgsl/chunks/gsplat/frag/gsplatCopyToWorkbuffer.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export default /* wgsl */`
88
#include "gsplatEvalSHVS"
99
#include "gsplatQuatToMat3VS"
1010
#include "gsplatSourceFormatVS"
11+
#include "packHalfPS"
1112
1213
uniform uTransform: mat4x4f;
1314
@@ -110,8 +111,8 @@ fn fragmentMain(input: FragmentInput) -> FragmentOutput {
110111
// write out results
111112
output.color = color;
112113
#ifndef GSPLAT_COLOR_ONLY
113-
output.color1 = vec4u(bitcast<u32>(modelCenter.x), bitcast<u32>(modelCenter.y), bitcast<u32>(modelCenter.z), pack2x16float(vec2f(covA.z, covB.z)));
114-
output.color2 = vec2u(pack2x16float(covA.xy), pack2x16float(covB.xy));
114+
output.color1 = vec4u(bitcast<u32>(modelCenter.x), bitcast<u32>(modelCenter.y), bitcast<u32>(modelCenter.z), pack2x16floatSafe(vec2f(covA.z, covB.z)));
115+
output.color2 = vec2u(pack2x16floatSafe(covA.xy), pack2x16floatSafe(covB.xy));
115116
#endif
116117
}
117118

src/scene/shader-lib/wgsl/chunks/gsplat/vert/gsplatCompressedData.js

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
export default /* wgsl */`
2+
#include "gsplatPackingPS"
3+
24
var packedTexture: texture_2d<u32>;
35
var chunkTexture: texture_2d<uff>;
46
@@ -14,15 +16,6 @@ fn unpack111011(bits: u32) -> vec3f {
1416
return (vec3f((vec3<u32>(bits) >> vec3<u32>(21u, 11u, 0u)) & vec3<u32>(0x7ffu, 0x3ffu, 0x7ffu))) / vec3f(2047.0, 1023.0, 2047.0);
1517
}
1618
17-
fn unpack8888(bits: u32) -> vec4f {
18-
return vec4f(
19-
f32((bits >> 24u) & 0xffu),
20-
f32((bits >> 16u) & 0xffu),
21-
f32((bits >> 8u) & 0xffu),
22-
f32(bits & 0xffu)
23-
) / 255.0;
24-
}
25-
2619
const norm_const: f32 = sqrt(2.0);
2720
2821
fn unpackRotation(bits: u32) -> vec4f {
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Generic half-float packing with software fallback for subnormals
2+
// Addresses vendor differences in pack2x16float subnormal handling (e.g., Adreno (TM) 750 on Samsung Galaxy S24)
3+
export default /* wgsl */`
4+
5+
#if defined(PLATFORM_ANDROID)
6+
7+
// Software pack of one f32 -> f16 (low 16 bits). Ties-to-even, full subnormals.
8+
fn floatToHalf(a: f32) -> u32 {
9+
let u: u32 = bitcast<u32>(a);
10+
let sign: u32 = (u >> 16u) & 0x8000u;
11+
let absu: u32 = u & 0x7FFFFFFFu;
12+
let man: u32 = u & 0x007FFFFFu;
13+
let e32: i32 = i32((u >> 23u) & 0xFFu) - 127;
14+
15+
// NaN / Inf
16+
if ((absu & 0x7F800000u) == 0x7F800000u) {
17+
let isnan = (man != 0u);
18+
return sign | select(0x7C00u, 0x7E00u, isnan);
19+
}
20+
21+
// Overflow to Inf
22+
if (e32 > 15) { return sign | 0x7C00u; }
23+
24+
// Normal half
25+
if (e32 >= -14) {
26+
var he: u32 = u32(e32 + 15);
27+
var hm: u32 = man >> 13u;
28+
let rem: u32 = man & 0x1FFFu;
29+
let add: u32 = select(0u, 1u, (rem > 0x1000u) || (rem == 0x1000u && (hm & 1u) == 1u));
30+
hm = (hm + add) & 0x3FFu;
31+
if ((hm & 0x400u) != 0u) {
32+
hm = 0u; he = he + 1u;
33+
if (he >= 31u) { return sign | 0x7C00u; }
34+
}
35+
return sign | (he << 10u) | hm;
36+
}
37+
38+
// Subnormals
39+
if (e32 >= -24) {
40+
let s: u32 = u32(-(e32 + 1));
41+
let mnorm: u32 = 0x00800000u | man;
42+
var hm: u32 = mnorm >> s;
43+
let mask: u32 = (1u << s) - 1u;
44+
let rem: u32 = mnorm & mask;
45+
let halfBt: u32 = 1u << (s - 1u);
46+
let add: u32 = select(0u, 1u, (rem > halfBt) || (rem == halfBt && (hm & 1u) == 1u));
47+
hm = hm + add;
48+
if (hm >= 0x400u) { return sign | (1u << 10u); }
49+
return sign | hm;
50+
}
51+
52+
return sign; // signed zero
53+
}
54+
55+
// Hybrid pack: software for subnormals, builtin for normal range
56+
fn pack2x16floatSafe(v: vec2f) -> u32 {
57+
// Convert the input floats to their 32-bit IEEE-754 bit patterns.
58+
// We'll inspect the exponent bits directly to determine their numeric range.
59+
let u_x: u32 = bitcast<u32>(v.x);
60+
let u_y: u32 = bitcast<u32>(v.y);
61+
62+
// Extract the unbiased exponent for each component (float32 uses bias = 127).
63+
// e32 = exponent - 127 ⇒ actual power of two for each value.
64+
let e32_x: i32 = i32((u_x >> 23u) & 0xFFu) - 127;
65+
let e32_y: i32 = i32((u_y >> 23u) & 0xFFu) - 127;
66+
67+
// -------------------------------------------------------------------------
68+
// Detect values that would become *subnormal* (or zero) in float16.
69+
//
70+
// e32 < -14 ⇔ |value| < 2^-14 ≈ 6.1035e-5
71+
//
72+
// Many mobile GPUs (including Adreno and Mali) mishandle half-precision
73+
// subnormals—typically flushing them to zero or rounding incorrectly.
74+
// To preserve correct rounding and sign, we use the software conversion
75+
// path (floatToHalf) for these small magnitudes.
76+
//
77+
// The software branch runs very rarely (<0.1% of typical values for
78+
// normalized scene data) and costs only a few ALU instructions, so the
79+
// performance impact is negligible while avoiding visible precision loss.
80+
// -------------------------------------------------------------------------
81+
if (e32_x < -14 || e32_y < -14) {
82+
// Convert both components with the reference software routine
83+
// and pack into a 32-bit uint: low 16 bits = x, high 16 bits = y.
84+
return (floatToHalf(v.y) << 16u) | floatToHalf(v.x);
85+
}
86+
87+
// Normal range: use the fast hardware builtin
88+
return pack2x16float(v);
89+
}
90+
91+
#else
92+
93+
// On non-Android platforms, use builtin directly (no subnormal workaround needed)
94+
fn pack2x16floatSafe(v: vec2f) -> u32 {
95+
return pack2x16float(v);
96+
}
97+
98+
#endif
99+
`;

src/scene/shader-lib/wgsl/collections/shader-chunks-wgsl.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ import opacityPS from '../chunks/standard/frag/opacity.js';
8989
import opacityDitherPS from '../chunks/standard/frag/opacity-dither.js';
9090
import outputPS from '../chunks/lit/frag/output.js';
9191
import outputAlphaPS from '../chunks/lit/frag/outputAlpha.js';
92+
import packHalfPS from '../chunks/internal/frag/packHalf.js';
9293
import outputTex2DPS from '../chunks/common/frag/outputTex2D.js';
9394
import sheenPS from '../chunks/standard/frag/sheen.js';
9495
import sheenGlossPS from '../chunks/standard/frag/sheenGloss.js';
@@ -258,6 +259,7 @@ const shaderChunksWGSL = {
258259
opacityDitherPS,
259260
outputPS,
260261
outputAlphaPS,
262+
packHalfPS,
261263
outputTex2DPS,
262264
sheenPS,
263265
sheenGlossPS,

0 commit comments

Comments
 (0)