Skip to content

Commit 4225011

Browse files
gfxclaude
andcommitted
Add #[inline] to all hot-path functions for cross-crate optimization
Without #[inline], Rust cannot inline functions across crate boundaries, forcing all calls through opaque function pointers. This prevented constant propagation, register allocation across call boundaries, and further compiler optimizations for downstream users. Benchmark results (Apple M3 Pro): - format: 102 ns → 63 ns (-38%) - parse: 805 ns → 738 ns (-8%) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b888758 commit 4225011

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

src/lib.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,42 +46,52 @@ fn unround(x: f64) -> Unrounded {
4646

4747
#[allow(dead_code, clippy::many_single_char_names)]
4848
impl Unrounded {
49+
#[inline]
4950
fn floor(self) -> u64 {
5051
self.0 >> 2
5152
}
53+
#[inline]
5254
fn round_half_down(self) -> u64 {
5355
(self.0 + 1) >> 2
5456
}
57+
#[inline]
5558
fn round(self) -> u64 {
5659
(self.0 + 1 + ((self.0 >> 2) & 1)) >> 2
5760
}
61+
#[inline]
5862
fn round_half_up(self) -> u64 {
5963
(self.0 + 2) >> 2
6064
}
65+
#[inline]
6166
fn ceil(self) -> u64 {
6267
(self.0 + 3) >> 2
6368
}
69+
#[inline]
6470
fn nudge(self, delta: i32) -> Unrounded {
6571
Unrounded(self.0.wrapping_add(delta as u64))
6672
}
6773

74+
#[inline]
6875
fn div(self, d: u64) -> Unrounded {
6976
let x = self.0;
7077
Unrounded((x / d) | (self.0 & 1) | u64::from(!x.is_multiple_of(d)))
7178
}
7279

80+
#[inline]
7381
fn rsh(self, s: u32) -> Unrounded {
7482
Unrounded((self.0 >> s) | (self.0 & 1) | u64::from(self.0 & ((1u64 << s) - 1) != 0))
7583
}
7684
}
7785

7886
/// `log10_pow2(x)` returns `floor(log10(2**x))` = `floor(x * log10(2))`.
87+
#[inline]
7988
fn log10_pow2(x: i32) -> i32 {
8089
// log10(2) ~ 0.30102999566 ~ 78913 / 2^18
8190
(x * 78913) >> 18
8291
}
8392

8493
/// `log2_pow10(x)` returns `floor(log2(10**x))` = `floor(x * log2(10))`.
94+
#[inline]
8595
fn log2_pow10(x: i32) -> i32 {
8696
// log2(10) ~ 3.32192809489 ~ 108853 / 2^15
8797
(x * 108_853) >> 15
@@ -114,6 +124,7 @@ const UINT64_POW10: [u64; 20] = [
114124
/// `unpack64` returns (m, e) such that `f = m * 2**e`.
115125
/// The caller is expected to have handled 0, NaN, and +/-Inf already.
116126
/// To unpack an `f32`, use `unpack64(f as f64)`.
127+
#[inline]
117128
#[allow(clippy::many_single_char_names)]
118129
fn unpack64(f: f64) -> (u64, i32) {
119130
const SHIFT: u32 = 64 - 53; // 11
@@ -133,6 +144,7 @@ fn unpack64(f: f64) -> (u64, i32) {
133144
/// `pack64` takes (m, e) and returns `f = m * 2**e`.
134145
/// It assumes the caller has provided a 53-bit mantissa m
135146
/// and an exponent that is in range for the mantissa.
147+
#[inline]
136148
fn pack64(m: u64, e: i32) -> f64 {
137149
if m & (1u64 << 52) == 0 {
138150
return f64::from_bits(m);
@@ -141,12 +153,14 @@ fn pack64(m: u64, e: i32) -> f64 {
141153
}
142154

143155
/// `unmin` returns the minimum unrounded that rounds to x.
156+
#[inline]
144157
fn unmin(x: u64) -> Unrounded {
145158
Unrounded((x << 2) - 2)
146159
}
147160

148161
/// `prescale` returns the scaling constants for (e, p).
149162
/// `lp` must be `log2_pow10(p)`.
163+
#[inline]
150164
fn prescale(e: i32, p: i32, lp: i32) -> Scaler {
151165
Scaler {
152166
pm: POW10_TAB[(p - POW10_MIN) as usize],
@@ -157,6 +171,7 @@ fn prescale(e: i32, p: i32, lp: i32) -> Scaler {
157171
/// `uscale` returns `unround(x * 2**e * 10**p)`.
158172
/// The caller should pass `c = prescale(e, p, log2_pow10(p))`
159173
/// and should have left-justified x so its high bit is set.
174+
#[inline]
160175
fn uscale(x: u64, c: Scaler) -> Unrounded {
161176
let r = u128::from(x) * u128::from(c.pm.hi);
162177
let mut hi = (r >> 64) as u64;
@@ -178,6 +193,7 @@ fn uscale(x: u64, c: Scaler) -> Unrounded {
178193
///
179194
/// Panics if `n > 18`.
180195
#[must_use]
196+
#[inline]
181197
#[allow(clippy::many_single_char_names)]
182198
pub fn fixed_width(f: f64, n: i32) -> (u64, i32) {
183199
assert!(n <= 18, "too many digits");
@@ -200,6 +216,7 @@ pub fn fixed_width(f: f64, n: i32) -> (u64, i32) {
200216
///
201217
/// Panics if `d > 10_000_000_000_000_000_000` (more than 19 digits).
202218
#[must_use]
219+
#[inline]
203220
#[allow(clippy::many_single_char_names)]
204221
pub fn parse(d: u64, p: i32) -> f64 {
205222
assert!(d <= 10_000_000_000_000_000_000, "too many digits");
@@ -223,6 +240,7 @@ pub fn parse(d: u64, p: i32) -> f64 {
223240
/// Parses a decimal string and returns the nearest f64.
224241
/// Returns `None` if the input is malformed.
225242
#[must_use]
243+
#[inline]
226244
pub fn parse_text(s: &[u8]) -> Option<f64> {
227245
fn is_digit(c: u8) -> bool {
228246
c.wrapping_sub(b'0') <= 9
@@ -290,6 +308,7 @@ pub fn parse_text(s: &[u8]) -> Option<f64> {
290308
/// using as few digits as possible that will still round trip
291309
/// back to the original f64.
292310
#[must_use]
311+
#[inline]
293312
#[allow(clippy::many_single_char_names)]
294313
pub fn short(f: f64) -> (u64, i32) {
295314
const MIN_EXP: i32 = -1085;
@@ -330,13 +349,15 @@ pub fn short(f: f64) -> (u64, i32) {
330349

331350
/// Computes the skewed footprint of `m * 2**e`,
332351
/// which is `floor(log10(3/4 * 2**e))` = `floor(e*log10(2) - log10(4/3))`.
352+
#[inline]
333353
fn skewed(e: i32) -> i32 {
334354
(e * 631_305 - 261_663) >> 21
335355
}
336356

337357
/// Removes trailing zeros from `x * 10**p`.
338358
/// If x ends in k zeros, returns `(x/10**k, p+k)`.
339359
/// Assumes that x ends in at most 16 zeros.
360+
#[inline]
340361
#[allow(clippy::unreadable_literal)]
341362
fn trim_zeros(x: u64, p: i32) -> (u64, i32) {
342363
const INV5P8: u64 = 0xc767074b22e90e21; // inverse of 5**8
@@ -397,6 +418,7 @@ const I2A: &[u8] = b"\
397418
/// Formats the decimal representation of u into a.
398419
/// The caller is responsible for ensuring that a is big enough to hold u.
399420
/// If a is too big, leading zeros will be filled in as needed.
421+
#[inline]
400422
fn format_base10(a: &mut [u8], mut u: u64) {
401423
let mut nd = a.len();
402424
while nd >= 8 {
@@ -447,6 +469,7 @@ fn format_base10(a: &mut [u8], mut u: u64) {
447469
/// The caller must pass nd set to the number of digits in d.
448470
/// Returns the number of bytes written to s.
449471
#[must_use]
472+
#[inline]
450473
pub fn fmt_float(s: &mut [u8], d: u64, p: i32, nd: i32) -> usize {
451474
let nd = nd as usize;
452475
// Put digits into s, leaving room for decimal point.
@@ -483,6 +506,7 @@ pub fn fmt_float(s: &mut [u8], d: u64, p: i32, nd: i32) -> usize {
483506

484507
/// Returns the number of decimal digits in d.
485508
#[must_use]
509+
#[inline]
486510
pub fn digits(d: u64) -> i32 {
487511
let nd = log10_pow2(64 - d.leading_zeros() as i32);
488512
nd + i32::from(d >= UINT64_POW10[nd as usize])

0 commit comments

Comments
 (0)