Skip to content

Commit e83dbaf

Browse files
committed
wasm size optimization
1 parent 82f3780 commit e83dbaf

File tree

7 files changed

+360
-53
lines changed

7 files changed

+360
-53
lines changed

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ repository = "https://github.com/wado-lang/fpfmt"
99
autotests = false
1010
exclude = ["tools/", "tests/"]
1111

12+
[features]
13+
small = []
14+
1215
[workspace]
1316
members = [".", "tools/pow10gen", "tools/bench", "tools/wasm-size"]
1417

README.md

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,46 @@ Regenerate the power-of-10 table:
2121
cargo run -p pow10gen
2222
```
2323

24+
## Features
25+
26+
### `small` — compact tables for WASM
27+
28+
Enable the `small` feature to replace the 11 KB power-of-10 lookup table with
29+
two smaller tables (632 bytes total) that are multiplied at runtime.
30+
This reduces WASM binary size from 14 KB to **4 KB** with a modest
31+
formatting slowdown (~1.6x), while parsing is unaffected.
32+
Still **2.4x faster** than ryu for formatting.
33+
34+
```toml
35+
fpfmt = { version = "0.2", features = ["small"] }
36+
```
37+
2438
## Benchmarks
2539

2640
Formatting and parsing 8 representative f64 values (`1.0`, `0.1`, `3.14`, `PI`, `E`, `1e23`, `5e-324`, `1.7976931348623157e308`).
2741

2842
Measured on Apple M3 Pro, macOS 15.7.3 (aarch64):
2943

30-
| Task | fpfmt | ryu | stdlib |
31-
|------|------:|----:|-------:|
32-
| **format** (f64 → string) | 63 ns | 164 ns | 535 ns |
33-
| **parse** (string → f64) | 738 ns || 702 ns |
44+
| Task | fpfmt | fpfmt `small` | ryu | stdlib |
45+
|------|------:|--------------:|----:|-------:|
46+
| **format** (f64 → string) | 65 ns | 102 ns | 240 ns | 528 ns |
47+
| **parse** (string → f64) | 697 ns | 701 ns | | 658 ns |
3448

3549
```sh
3650
cargo bench -p bench
51+
cargo bench -p bench --features small
3752
```
3853

39-
## Wasm size
54+
## WASM size
4055

41-
32,970 bytes for `short` + `parse` as a cdylib (`wasm32-unknown-unknown`, `-Oz`).
56+
| Configuration | Size |
57+
|---------------|-----:|
58+
| default | 14,428 bytes |
59+
| `small` | **4,224 bytes** |
4260

4361
```sh
44-
RUSTFLAGS="-C opt-level=s" cargo build --target wasm32-unknown-unknown --release -p wasm-size
45-
RUSTFLAGS="-C opt-level=z" cargo build --target wasm32-unknown-unknown --release -p wasm-size
62+
cargo build --target wasm32-unknown-unknown --release -p wasm-size
63+
cargo build --target wasm32-unknown-unknown --release -p wasm-size --features small
4664
wc -c target/wasm32-unknown-unknown/release/wasm_size.wasm
4765
```
4866

src/lib.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
mod pow10tab;
1111

12+
#[cfg(feature = "small")]
13+
use pow10tab::{K, POW10_COARSE, POW10_FINE, Q_MIN};
14+
#[cfg(not(feature = "small"))]
1215
use pow10tab::{POW10_MIN, POW10_TAB};
1316

1417
/// `PmHiLo` represents `hi<<64 - lo`.
@@ -152,6 +155,7 @@ fn unmin(x: u64) -> Unrounded {
152155

153156
/// `prescale` returns the scaling constants for (e, p).
154157
/// `lp` must be `log2_pow10(p)`.
158+
#[cfg(not(feature = "small"))]
155159
#[inline]
156160
fn prescale(e: i32, p: i32, lp: i32) -> Scaler {
157161
Scaler {
@@ -160,6 +164,64 @@ fn prescale(e: i32, p: i32, lp: i32) -> Scaler {
160164
}
161165
}
162166

167+
#[cfg(feature = "small")]
168+
#[inline]
169+
#[allow(clippy::many_single_char_names)]
170+
fn mul_pow10(p: i32) -> PmHiLo {
171+
let q = p.div_euclid(K);
172+
let r = p.rem_euclid(K) as usize;
173+
174+
let c = POW10_COARSE[(q - Q_MIN) as usize];
175+
let f = POW10_FINE[r];
176+
177+
// Convert coarse from PmHiLo to raw u128 (hi<<64 - lo).
178+
let c_raw = (u128::from(c.hi) << 64).wrapping_sub(u128::from(c.lo));
179+
let c_hi = (c_raw >> 64) as u64;
180+
let c_lo = c_raw as u64;
181+
182+
// Product = c_raw * f * 2^64 (256-bit).
183+
// Split into a1*2^64 + a0 = c_raw * f, then shift left 64.
184+
let a1 = u128::from(c_hi) * u128::from(f);
185+
let a0 = u128::from(c_lo) * u128::from(f);
186+
187+
// Top 128 = a1 + (a0 >> 64); remainder = (a0 as u64).
188+
let mut top = a1 + (a0 >> 64);
189+
let has_remainder = a0 as u64 != 0;
190+
191+
// Round up if not exact (matching generator convention).
192+
if has_remainder {
193+
top += 1;
194+
}
195+
196+
// Normalize: ensure bit 127 is set.
197+
let norm = 1 - (top >> 127) as u32;
198+
top <<= norm;
199+
200+
let hi = (top >> 64) as u64;
201+
let lo = top as u64;
202+
203+
// Convert to PmHiLo (hi<<64 - lo).
204+
if lo != 0 {
205+
PmHiLo {
206+
hi: hi + 1,
207+
lo: lo.wrapping_neg(),
208+
}
209+
} else {
210+
PmHiLo { hi, lo: 0 }
211+
}
212+
}
213+
214+
/// `prescale` returns the scaling constants for (e, p).
215+
/// `lp` must be `log2_pow10(p)`.
216+
#[cfg(feature = "small")]
217+
#[inline]
218+
fn prescale(e: i32, p: i32, lp: i32) -> Scaler {
219+
Scaler {
220+
pm: mul_pow10(p),
221+
s: -(e + lp + 3),
222+
}
223+
}
224+
163225
/// `uscale` returns `unround(x * 2**e * 10**p)`.
164226
/// The caller should pass `c = prescale(e, p, log2_pow10(p))`
165227
/// and should have left-justified x so its high bit is set.
@@ -511,6 +573,7 @@ mod tests {
511573

512574
/// `TestPow10`: verify power-of-10 table entries.
513575
/// Port of Go's `TestPow10`.
576+
#[cfg(not(feature = "small"))]
514577
#[test]
515578
fn test_pow10() {
516579
let cases: [(i32, PmHiLo, i32); 4] = [

src/pow10tab.rs

Lines changed: 147 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44

55
use super::PmHiLo;
66

7+
#[cfg(not(feature = "small"))]
78
pub(crate) const POW10_MIN: i32 = -348;
8-
pub(crate) const POW10_MAX: i32 = 347;
99

10-
/// `pow10_tab` holds 128-bit mantissas of powers of 10.
11-
/// The values are scaled so the high bit is always set.
10+
#[cfg(not(feature = "small"))]
1211
pub(crate) static POW10_TAB: [PmHiLo; 696] = [
1312
PmHiLo {
1413
hi: 0xfa8fd5a0081c0289,
@@ -2795,3 +2794,148 @@ pub(crate) static POW10_TAB: [PmHiLo; 696] = [
27952794
lo: 0xb48e6a0d2d2e5604,
27962795
}, // 1e347 * 2**-1025
27972796
];
2797+
2798+
#[cfg(feature = "small")]
2799+
pub(crate) const K: i32 = 27;
2800+
2801+
#[cfg(feature = "small")]
2802+
pub(crate) const Q_MIN: i32 = -13;
2803+
2804+
#[cfg(feature = "small")]
2805+
pub(crate) static POW10_COARSE: [PmHiLo; 26] = [
2806+
PmHiLo {
2807+
hi: 0x8049a4ac0c5811af,
2808+
lo: 0xdfa4769288829d87,
2809+
}, // 1e-351 * 2**1293
2810+
PmHiLo {
2811+
hi: 0xcf42894a5dce35eb,
2812+
lo: 0xadf9b3537d798a46,
2813+
}, // 1e-324 * 2**1204
2814+
PmHiLo {
2815+
hi: 0xa76c582338ed2622,
2816+
lo: 0x50d50d47f5090db1,
2817+
}, // 1e-297 * 2**1114
2818+
PmHiLo {
2819+
hi: 0x873e4f75e2224e69,
2820+
lo: 0xa588bb5917fb5d6e,
2821+
}, // 1e-270 * 2**1024
2822+
PmHiLo {
2823+
hi: 0xda7f5bf590966849,
2824+
lo: 0x50c65b8aaf957661,
2825+
}, // 1e-243 * 2**935
2826+
PmHiLo {
2827+
hi: 0xb080392cc4349ded,
2828+
lo: 0x427286b26955304c,
2829+
}, // 1e-216 * 2**845
2830+
PmHiLo {
2831+
hi: 0x8e938662882af53f,
2832+
lo: 0xab814b848d7d1163,
2833+
}, // 1e-189 * 2**755
2834+
PmHiLo {
2835+
hi: 0xe65829b3046b0afb,
2836+
lo: 0xf34b5a5ceed5aeed,
2837+
}, // 1e-162 * 2**666
2838+
PmHiLo {
2839+
hi: 0xba121a4650e4ddec,
2840+
lo: 0x6d0cb29d9e931bec,
2841+
}, // 1e-135 * 2**576
2842+
PmHiLo {
2843+
hi: 0x964e858c91ba2656,
2844+
lo: 0xc595f8072aef0790,
2845+
}, // 1e-108 * 2**486
2846+
PmHiLo {
2847+
hi: 0xf2d56790ab41c2a3,
2848+
lo: 0x051d8d66bdc0463c,
2849+
}, // 1e-81 * 2**397
2850+
PmHiLo {
2851+
hi: 0xc428d05aa4751e4d,
2852+
lo: 0x55681eb3c3d94779,
2853+
}, // 1e-54 * 2**307
2854+
PmHiLo {
2855+
hi: 0x9e74d1b791e07e49,
2856+
lo: 0x88a15d9b30aacb82,
2857+
}, // 1e-27 * 2**217
2858+
PmHiLo {
2859+
hi: 0x8000000000000000,
2860+
lo: 0x0000000000000000,
2861+
}, // 1e0 * 2**127
2862+
PmHiLo {
2863+
hi: 0xcecb8f27f4200f3a,
2864+
lo: 0x0000000000000000,
2865+
}, // 1e27 * 2**38
2866+
PmHiLo {
2867+
hi: 0xa70c3c40a64e6c52,
2868+
lo: 0x666f6f49a09826dc,
2869+
}, // 1e54 * 2**-52
2870+
PmHiLo {
2871+
hi: 0x86f0ac99b4e8dafe,
2872+
lo: 0x965fd744c2128e5c,
2873+
}, // 1e81 * 2**-142
2874+
PmHiLo {
2875+
hi: 0xda01ee641a708dea,
2876+
lo: 0x17f190b7df336b6a,
2877+
}, // 1e108 * 2**-231
2878+
PmHiLo {
2879+
hi: 0xb01ae745b101e9e5,
2880+
lo: 0xa13fa23008d18070,
2881+
}, // 1e135 * 2**-321
2882+
PmHiLo {
2883+
hi: 0x8e41ade9fbebc27e,
2884+
lo: 0xeba770ec417b8cf8,
2885+
}, // 1e162 * 2**-411
2886+
PmHiLo {
2887+
hi: 0xe5d3ef282a242e82,
2888+
lo: 0x70e9973757925a05,
2889+
}, // 1e189 * 2**-500
2890+
PmHiLo {
2891+
hi: 0xb9a74a0637ce2ee2,
2892+
lo: 0x926ac1d428e8c96d,
2893+
}, // 1e216 * 2**-590
2894+
PmHiLo {
2895+
hi: 0x95f83d0a1fb69cda,
2896+
lo: 0xb54250efea9b0671,
2897+
}, // 1e243 * 2**-680
2898+
PmHiLo {
2899+
hi: 0xf24a01a73cf2dcd0,
2900+
lo: 0x439cc4c698c37313,
2901+
}, // 1e270 * 2**-769
2902+
PmHiLo {
2903+
hi: 0xc3b8358109e84f08,
2904+
lo: 0xf579d07f13b8ff37,
2905+
}, // 1e297 * 2**-859
2906+
PmHiLo {
2907+
hi: 0x9e19db92b4e31baa,
2908+
lo: 0x93f85d3d957cb92e,
2909+
}, // 1e324 * 2**-949
2910+
];
2911+
2912+
#[cfg(feature = "small")]
2913+
pub(crate) static POW10_FINE: [u64; 27] = [
2914+
0x8000000000000000, // 1e0 * 2**127
2915+
0xa000000000000000, // 1e1 * 2**124
2916+
0xc800000000000000, // 1e2 * 2**121
2917+
0xfa00000000000000, // 1e3 * 2**118
2918+
0x9c40000000000000, // 1e4 * 2**114
2919+
0xc350000000000000, // 1e5 * 2**111
2920+
0xf424000000000000, // 1e6 * 2**108
2921+
0x9896800000000000, // 1e7 * 2**104
2922+
0xbebc200000000000, // 1e8 * 2**101
2923+
0xee6b280000000000, // 1e9 * 2**98
2924+
0x9502f90000000000, // 1e10 * 2**94
2925+
0xba43b74000000000, // 1e11 * 2**91
2926+
0xe8d4a51000000000, // 1e12 * 2**88
2927+
0x9184e72a00000000, // 1e13 * 2**84
2928+
0xb5e620f480000000, // 1e14 * 2**81
2929+
0xe35fa931a0000000, // 1e15 * 2**78
2930+
0x8e1bc9bf04000000, // 1e16 * 2**74
2931+
0xb1a2bc2ec5000000, // 1e17 * 2**71
2932+
0xde0b6b3a76400000, // 1e18 * 2**68
2933+
0x8ac7230489e80000, // 1e19 * 2**64
2934+
0xad78ebc5ac620000, // 1e20 * 2**61
2935+
0xd8d726b7177a8000, // 1e21 * 2**58
2936+
0x878678326eac9000, // 1e22 * 2**54
2937+
0xa968163f0a57b400, // 1e23 * 2**51
2938+
0xd3c21bcecceda100, // 1e24 * 2**48
2939+
0x84595161401484a0, // 1e25 * 2**44
2940+
0xa56fa5b99019a5c8, // 1e26 * 2**41
2941+
];

tools/bench/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ version = "0.0.0"
44
edition = "2024"
55
publish = false
66

7+
[features]
8+
small = ["fpfmt/small"]
9+
710
[dependencies]
811
fpfmt = { path = "../.." }
912
ryu = "1"

0 commit comments

Comments
 (0)