Skip to content

Commit 31c7215

Browse files
authored
Add fast path for parseInt and parseFloat (#4542)
1 parent 7df2cdc commit 31c7215

File tree

6 files changed

+109
-65
lines changed

6 files changed

+109
-65
lines changed

core/engine/src/builtins/intl/number_format/mod.rs

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::borrow::Cow;
2-
31
use boa_gc::{Finalize, Trace};
42
use fixed_decimal::{Decimal, FloatPrecision, SignDisplay};
53
use icu_decimal::{
@@ -30,7 +28,7 @@ use crate::{
3028
NativeFunction,
3129
builtins::{
3230
BuiltInConstructor, BuiltInObject, IntrinsicObject, builder::BuiltInBuilder,
33-
options::get_option, string::is_trimmable_whitespace,
31+
options::get_option,
3432
},
3533
context::intrinsics::{Intrinsics, StandardConstructor, StandardConstructors},
3634
js_string,
@@ -825,13 +823,12 @@ fn to_intl_mathematical_value(value: &JsValue, context: &mut Context) -> JsResul
825823
pub(crate) fn js_string_to_fixed_decimal(string: &JsString) -> Option<Decimal> {
826824
// 1. Let text be ! StringToCodePoints(str).
827825
// 2. Let literal be ParseText(text, StringNumericLiteral).
828-
let Ok(string) = string.to_std_string() else {
826+
let Ok(string) = string.trim().to_std_string() else {
829827
// 3. If literal is a List of errors, return NaN.
830828
return None;
831829
};
832830
// 4. Return StringNumericValue of literal.
833-
let string = string.trim_matches(is_trimmable_whitespace);
834-
match string {
831+
match string.as_str() {
835832
"" => return Some(Decimal::from(0)),
836833
"-Infinity" | "Infinity" | "+Infinity" => return None,
837834
_ => {}
@@ -856,11 +853,10 @@ pub(crate) fn js_string_to_fixed_decimal(string: &JsString) -> Option<Decimal> {
856853
return None;
857854
}
858855
let int = BigInt::from_str_radix(string, base).ok()?;
859-
let int_str = int.to_string();
860856

861-
Cow::Owned(int_str)
857+
int.to_string()
862858
} else {
863-
Cow::Borrowed(string)
859+
string
864860
};
865861

866862
Decimal::try_from_str(&s).ok()

core/engine/src/builtins/number/globals.rs

Lines changed: 74 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
use crate::{
22
Context, JsArgs, JsResult, JsStr, JsString, JsValue,
3-
builtins::{BuiltInBuilder, BuiltInObject, IntrinsicObject, string::is_trimmable_whitespace},
3+
builtins::{BuiltInBuilder, BuiltInObject, IntrinsicObject},
44
context::intrinsics::Intrinsics,
55
object::JsObject,
66
realm::Realm,
77
string::StaticJsStrings,
88
};
99

1010
use boa_macros::js_str;
11-
use cow_utils::CowUtils;
11+
use boa_string::JsStrVariant;
1212

1313
/// Builtin javascript 'isFinite(number)' function.
1414
///
@@ -154,17 +154,23 @@ fn from_js_str_radix(src: JsStr<'_>, radix: u8) -> Option<f64> {
154154
/// [spec]: https://tc39.es/ecma262/#sec-parseint-string-radix
155155
/// [mdn]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseInt
156156
pub(crate) fn parse_int(_: &JsValue, args: &[JsValue], context: &mut Context) -> JsResult<JsValue> {
157-
let (Some(val), radix) = (args.first(), args.get_or_undefined(1)) else {
157+
let (Some(string), radix) = (args.first(), args.get_or_undefined(1)) else {
158158
// Not enough arguments to parseInt.
159159
return Ok(JsValue::nan());
160160
};
161161

162+
// OPTIMIZATION: We can skip the round-trip when the value is already a number.
163+
if let Some(int) = string.as_i32()
164+
&& radix.is_null_or_undefined()
165+
{
166+
return Ok(JsValue::new(int));
167+
}
168+
162169
// 1. Let inputString be ? ToString(string).
163-
let input_string = val.to_string(context)?;
170+
let input_string = string.to_string(context)?;
164171

165172
// 2. Let S be ! TrimString(inputString, start).
166173
let mut s = input_string.trim_start();
167-
// let mut
168174

169175
// 3. Let sign be 1.
170176
// 4. If S is not empty and the first code unit of S is the code unit 0x002D (HYPHEN-MINUS),
@@ -297,40 +303,72 @@ pub(crate) fn parse_float(
297303
args: &[JsValue],
298304
context: &mut Context,
299305
) -> JsResult<JsValue> {
300-
if let Some(val) = args.first() {
301-
// TODO: parse float with optimal utf16 algorithm
302-
let input_string = val.to_string(context)?.to_std_string_escaped();
303-
let s = input_string.trim_start_matches(is_trimmable_whitespace);
304-
let s_prefix = s.chars().take(4).collect::<String>();
305-
let s_prefix_lower = s_prefix.cow_to_ascii_lowercase();
306-
// TODO: write our own lexer to match syntax StrDecimalLiteral
307-
if s.starts_with("Infinity") || s.starts_with("+Infinity") {
308-
Ok(JsValue::new(f64::INFINITY))
309-
} else if s.starts_with("-Infinity") {
310-
Ok(JsValue::new(f64::NEG_INFINITY))
311-
} else if s_prefix_lower.starts_with("inf")
312-
|| s_prefix_lower.starts_with("+inf")
313-
|| s_prefix_lower.starts_with("-inf")
314-
{
315-
// Prevent fast_float from parsing "inf", "+inf" as Infinity and "-inf" as -Infinity
316-
Ok(JsValue::nan())
317-
} else {
318-
Ok(fast_float2::parse_partial::<f64, _>(s).map_or_else(
319-
|_| JsValue::nan(),
320-
|(f, len)| {
321-
if len > 0 {
322-
JsValue::new(f)
323-
} else {
324-
JsValue::nan()
325-
}
326-
},
327-
))
306+
const PLUS_CHAR: u16 = b'+' as u16;
307+
const MINUS_CHAR: u16 = b'-' as u16;
308+
const LOWER_CASE_I_CHAR: u16 = b'i' as u16;
309+
const UPPER_CASE_I_CHAR: u16 = b'I' as u16;
310+
311+
let Some(string) = args.first() else {
312+
return Ok(JsValue::nan());
313+
};
314+
315+
// OPTIMIZATION: We can skip the round-trip when the value is already a number.
316+
if string.is_number() {
317+
// Special case for negative zero - it should become positive zero
318+
if string.is_negative_zero() {
319+
return Ok(JsValue::new(0));
328320
}
329-
} else {
330-
// Not enough arguments to parseFloat.
331-
Ok(JsValue::nan())
321+
322+
return Ok(string.clone());
323+
}
324+
325+
// 1. Let inputString be ? ToString(string).
326+
let input_string = string.to_string(context)?;
327+
328+
// 2. Let trimmedString be ! TrimString(inputString, start).
329+
let trimmed_string = input_string.trim_start();
330+
331+
// 3. Let trimmed be StringToCodePoints(trimmedString).
332+
// 4. Let trimmedPrefix be the longest prefix of trimmed that satisfies the syntax of a StrDecimalLiteral, which might be trimmed itself. If there is no such prefix, return NaN.
333+
// 5. Let parsedNumber be ParseText(trimmedPrefix, StrDecimalLiteral).
334+
// 6. Assert: parsedNumber is a Parse Node.
335+
// 7. Return the StringNumericValue of parsedNumber.
336+
let (positive, prefix) = match trimmed_string.get(0) {
337+
Some(PLUS_CHAR) => (true, trimmed_string.get(1..).unwrap_or(JsStr::latin1(&[]))),
338+
Some(MINUS_CHAR) => (false, trimmed_string.get(1..).unwrap_or(JsStr::latin1(&[]))),
339+
_ => (true, trimmed_string),
340+
};
341+
342+
if prefix.starts_with(js_str!("Infinity")) {
343+
if positive {
344+
return Ok(JsValue::positive_infinity());
345+
}
346+
return Ok(JsValue::negative_infinity());
347+
} else if let Some(LOWER_CASE_I_CHAR | UPPER_CASE_I_CHAR) = prefix.get(0) {
348+
return Ok(JsValue::nan());
332349
}
350+
351+
let value = match trimmed_string.variant() {
352+
JsStrVariant::Latin1(s) => fast_float2::parse_partial::<f64, _>(s),
353+
JsStrVariant::Utf16(s) => {
354+
// TODO: Explore adding direct UTF-16 parsing support to fast_float2.
355+
let s = String::from_utf16_lossy(s);
356+
fast_float2::parse_partial::<f64, _>(s.as_bytes())
357+
}
358+
};
359+
360+
Ok(value.map_or_else(
361+
|_| JsValue::nan(),
362+
|(f, len)| {
363+
if len > 0 {
364+
JsValue::new(f)
365+
} else {
366+
JsValue::nan()
367+
}
368+
},
369+
))
333370
}
371+
334372
pub(crate) struct ParseFloat;
335373

336374
impl IntrinsicObject for ParseFloat {

core/engine/src/builtins/string/mod.rs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,26 +57,6 @@ pub(crate) enum Placement {
5757
End,
5858
}
5959

60-
/// Helper function to check if a `char` is trimmable.
61-
pub(crate) const fn is_trimmable_whitespace(c: char) -> bool {
62-
// The rust implementation of `trim` does not regard the same characters whitespace as ecma standard does
63-
//
64-
// Rust uses \p{White_Space} by default, which also includes:
65-
// `\u{0085}' (next line)
66-
// And does not include:
67-
// '\u{FEFF}' (zero width non-breaking space)
68-
// Explicit whitespace: https://tc39.es/ecma262/#sec-white-space
69-
matches!(
70-
c,
71-
'\u{0009}' | '\u{000B}' | '\u{000C}' | '\u{0020}' | '\u{00A0}' | '\u{FEFF}' |
72-
// Unicode Space_Separator category
73-
'\u{1680}' | '\u{2000}'
74-
..='\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' |
75-
// Line terminators: https://tc39.es/ecma262/#sec-line-terminators
76-
'\u{000A}' | '\u{000D}' | '\u{2028}' | '\u{2029}'
77-
)
78-
}
79-
8060
/// JavaScript `String` implementation.
8161
#[derive(Debug, Clone, Copy)]
8262
pub(crate) struct String;

core/engine/src/value/inner/legacy.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ impl EnumBasedValue {
130130
matches!(self, Self::Float64(_))
131131
}
132132

133+
/// Returns true if a value is negative zero (`-0`).
134+
#[must_use]
135+
#[inline]
136+
pub(crate) const fn is_negative_zero(&self) -> bool {
137+
matches!(self, Self::Float64(value) if value.to_bits() == (-0f64).to_bits())
138+
}
139+
133140
/// Returns true if a value is a 32-bits integer.
134141
#[must_use]
135142
#[inline]

core/engine/src/value/inner/nan_boxed.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ mod bits {
184184
/// The constant true value.
185185
pub(super) const VALUE_TRUE: u64 = MASK_BOOLEAN | 1;
186186

187+
// The constant `-0` value.
188+
pub(super) const VALUE_NEGATIVE_ZERO: u64 = (-0f64).to_bits();
189+
187190
/// Checks that a value is a valid boolean (either true or false).
188191
#[inline(always)]
189192
pub(super) const fn is_bool(value: u64) -> bool {
@@ -198,6 +201,12 @@ mod bits {
198201
|| (value & MASK_KIND) == (MASK_NAN | TAG_NAN)
199202
}
200203

204+
/// Checks that a value is a negative zero (`-0`).
205+
#[inline(always)]
206+
pub(super) const fn is_negative_zero(value: u64) -> bool {
207+
value == VALUE_NEGATIVE_ZERO
208+
}
209+
201210
/// Checks that a value is a valid integer32.
202211
#[inline(always)]
203212
pub(super) const fn is_integer32(value: u64) -> bool {
@@ -519,6 +528,13 @@ impl NanBoxedValue {
519528
bits::is_float(self.value())
520529
}
521530

531+
/// Returns true if a value is negative zero (`-0.0`).
532+
#[must_use]
533+
#[inline(always)]
534+
pub(crate) fn is_negative_zero(&self) -> bool {
535+
bits::is_negative_zero(self.value())
536+
}
537+
522538
/// Returns true if a value is a 32-bits integer.
523539
#[must_use]
524540
#[inline(always)]

core/engine/src/value/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,13 @@ impl JsValue {
354354
self.0.is_integer32() || self.0.is_float64()
355355
}
356356

357+
/// Returns true if the value is a negative zero (`-0`).
358+
#[inline]
359+
#[must_use]
360+
pub(crate) fn is_negative_zero(&self) -> bool {
361+
self.0.is_negative_zero()
362+
}
363+
357364
/// Returns the number if the value is a number, otherwise `None`.
358365
#[inline]
359366
#[must_use]

0 commit comments

Comments
 (0)