Skip to content

Commit f0db6d8

Browse files
relrelbHerschel
authored andcommitted
avm1: Correct parseFloat()
Rewrite the implementation of `string_to_f64()` to match Flash behavior. This affects `parseFloat()` as well as any `Value` to `f64` coercion.
1 parent 0148fde commit f0db6d8

File tree

2 files changed

+184
-123
lines changed

2 files changed

+184
-123
lines changed

core/src/avm1/globals.rs

Lines changed: 5 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -254,76 +254,17 @@ pub fn get_nan<'gc>(
254254
}
255255
}
256256

257-
pub fn parse_float_impl(s: &WStr, allow_multiple_dots: bool) -> f64 {
258-
let mut out_str = String::with_capacity(s.len());
259-
260-
// TODO: Implementing this in a very janky way for now,
261-
// feeding the string to Rust's float parser.
262-
// Flash's parser is much more lenient, so we have to massage
263-
// the string into an acceptable format.
264-
let mut allow_dot = true;
265-
let mut allow_exp = true;
266-
let mut allow_sign = true;
267-
for unit in s.iter() {
268-
let c = match u8::try_from(unit) {
269-
Ok(c) => c,
270-
// Invalid char, `parseFloat` ignores all trailing garbage.
271-
Err(_) => break,
272-
};
273-
274-
match c {
275-
b'0'..=b'9' => {
276-
allow_sign = false;
277-
out_str.push(c.into());
278-
}
279-
b'+' | b'-' if allow_sign => {
280-
// Sign allowed at first char and following e
281-
allow_sign = false;
282-
out_str.push(c.into());
283-
}
284-
b'.' if allow_exp => {
285-
allow_sign = false;
286-
if allow_dot {
287-
allow_dot = false;
288-
out_str.push(c.into());
289-
} else {
290-
// AVM1 allows multiple . except after e
291-
if allow_multiple_dots {
292-
allow_exp = false;
293-
} else {
294-
break;
295-
}
296-
}
297-
}
298-
b'e' | b'E' if allow_exp => {
299-
allow_sign = true;
300-
allow_exp = false;
301-
allow_dot = false;
302-
out_str.push(c.into());
303-
}
304-
305-
// Invalid char, `parseFloat` ignores all trailing garbage.
306-
_ => break,
307-
};
308-
}
309-
310-
out_str.parse::<f64>().unwrap_or(f64::NAN)
311-
}
312-
313257
pub fn parse_float<'gc>(
314258
activation: &mut Activation<'_, 'gc, '_>,
315259
_this: Object<'gc>,
316260
args: &[Value<'gc>],
317261
) -> Result<Value<'gc>, Error<'gc>> {
318-
let s = if let Some(val) = args.get(0) {
319-
val.coerce_to_string(activation)?
262+
if let Some(value) = args.get(0) {
263+
let string = value.coerce_to_string(activation)?;
264+
Ok(crate::avm1::value::parse_float_impl(&string, false).into())
320265
} else {
321-
return Ok(f64::NAN.into());
322-
};
323-
324-
let s = s.trim_start();
325-
326-
Ok(parse_float_impl(s, true).into())
266+
Ok(Value::Undefined)
267+
}
327268
}
328269

329270
pub fn set_interval<'gc>(

core/src/avm1/value.rs

Lines changed: 179 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ impl<'gc> Value<'gc> {
137137
/// * In SWF6 and lower, `undefined` is coerced to `0.0` (like `false`)
138138
/// rather than `NaN` as required by spec.
139139
/// * In SWF5 and lower, hexadecimal is unsupported.
140-
/// * In SWF4 and lower, a string is coerced using the `parseFloat` function
141-
/// and returns `0.0` rather than `NaN` if it cannot be converted to a number.
140+
/// * In SWF4 and lower, `0.0` is returned rather than `NaN` if a string cannot
141+
/// be converted to a number.
142142
fn primitive_as_number(&self, activation: &mut Activation<'_, 'gc, '_>) -> f64 {
143143
match self {
144144
Value::Undefined if activation.swf_version() < 7 => 0.0,
@@ -459,6 +459,32 @@ impl<'gc> Value<'gc> {
459459
}
460460
}
461461

462+
/// Calculate `value * 10^exp` through repeated multiplication or division.
463+
fn decimal_shift(mut value: f64, mut exp: i32) -> f64 {
464+
let mut base: f64 = 10.0;
465+
// The multiply and division branches are intentionally separate to match Flash's behavior.
466+
if exp > 0 {
467+
while exp > 0 {
468+
if (exp & 1) != 0 {
469+
value *= base;
470+
}
471+
exp >>= 1;
472+
base *= base;
473+
}
474+
} else {
475+
// Avoid overflow when `exp == i32::MIN`.
476+
let mut exp = exp.unsigned_abs();
477+
while exp > 0 {
478+
if (exp & 1) != 0 {
479+
value /= base;
480+
}
481+
exp >>= 1;
482+
base *= base;
483+
}
484+
};
485+
value
486+
}
487+
462488
/// Converts an `f64` to a String with (hopefully) the same output as Flash AVM1.
463489
/// 15 digits are displayed (not including leading 0s in a decimal <1).
464490
/// Exponential notation is used for numbers <= 1e-5 and >= 1e15.
@@ -511,31 +537,6 @@ fn f64_to_string(mut n: f64) -> Cow<'static, str> {
511537
const LOG10_2: f64 = 0.301029995663981; // log_10(2) value (less precise than Rust's f64::LOG10_2).
512538
let mut exp = f64::round(f64::from(exp_base2) * LOG10_2) as i32;
513539

514-
// Calculate `value * 10^exp` through repeated multiplication or division.
515-
fn decimal_shift(mut value: f64, mut exp: i32) -> f64 {
516-
let mut base: f64 = 10.0;
517-
// The multiply and division branches are intentionally separate to match Flash's behavior.
518-
if exp > 0 {
519-
while exp > 0 {
520-
if (exp & 1) != 0 {
521-
value *= base;
522-
}
523-
exp >>= 1;
524-
base *= base;
525-
}
526-
} else {
527-
exp = -exp;
528-
while exp > 0 {
529-
if (exp & 1) != 0 {
530-
value /= base;
531-
}
532-
exp >>= 1;
533-
base *= base;
534-
}
535-
};
536-
value
537-
}
538-
539540
// Shift the decimal value so that it's in the range of [0.0, 10.0).
540541
let mut mantissa: f64 = decimal_shift(n, -exp);
541542

@@ -671,51 +672,170 @@ fn f64_to_string(mut n: f64) -> Cow<'static, str> {
671672
}
672673
}
673674

674-
/// Converts a `WStr` to an f64 based on the SWF version.
675-
fn string_to_f64(str: &WStr, swf_version: u8) -> f64 {
676-
if swf_version < 5 {
677-
use crate::avm1::globals::parse_float_impl;
678-
let v = parse_float_impl(str.trim_start(), true);
679-
if v.is_nan() {
680-
return 0.0;
675+
/// Consumes an optional sign character.
676+
/// Returns whether a minus sign was consumed.
677+
fn parse_sign(s: &mut &WStr) -> bool {
678+
if let Some(after_sign) = s.strip_prefix(b'-') {
679+
*s = after_sign;
680+
true
681+
} else if let Some(after_sign) = s.strip_prefix(b'+') {
682+
*s = after_sign;
683+
false
684+
} else {
685+
false
686+
}
687+
}
688+
689+
/// Converts a `WStr` to an `f64`.
690+
///
691+
/// This function might fail for some invalid inputs, by returning `NaN`.
692+
///
693+
/// `strict` typically tells whether to behave like `Number()` or `parseFloat()`:
694+
/// * `strict == true` fails on trailing garbage (like `Number()`).
695+
/// * `strict == false` ignores trailing garbage (like `parseFloat()`).
696+
pub fn parse_float_impl(mut s: &WStr, strict: bool) -> f64 {
697+
fn is_ascii_digit(c: u16) -> bool {
698+
u8::try_from(c).map_or(false, |c| c.is_ascii_digit())
699+
}
700+
701+
// Allow leading whitespace.
702+
s = s.trim_start();
703+
704+
// Parse sign.
705+
let is_negative = parse_sign(&mut s);
706+
let after_sign = s;
707+
708+
// Validate digits before decimal point.
709+
s = s.trim_start_matches(is_ascii_digit);
710+
let mut exp = (after_sign.len() - s.len()) as i32 - 1;
711+
712+
// Validate digits after decimal point.
713+
if let Some(after_dot) = s.strip_prefix(b'.') {
714+
s = after_dot;
715+
s = s.trim_start_matches(is_ascii_digit);
716+
}
717+
718+
// Fail if we got no digits.
719+
// TODO: Compare by reference instead?
720+
if s.len() == after_sign.len() {
721+
return f64::NAN;
722+
}
723+
724+
// Handle exponent.
725+
if let Some(after_e) = s.strip_prefix(b"eE".as_ref()) {
726+
s = after_e;
727+
728+
// Parse exponent sign.
729+
let exponent_is_negative = parse_sign(&mut s);
730+
731+
// Parse exponent itself.
732+
let mut exponent: i32 = 0;
733+
s = s.trim_start_matches(|c| {
734+
match u8::try_from(c)
735+
.ok()
736+
.and_then(|c| char::from(c).to_digit(10))
737+
{
738+
Some(digit) => {
739+
exponent = exponent.wrapping_mul(10);
740+
exponent = exponent.wrapping_add(digit as i32);
741+
true
742+
}
743+
None => false,
744+
}
745+
});
746+
747+
// Apply exponent sign.
748+
if exponent_is_negative {
749+
exponent = exponent.wrapping_neg();
681750
}
682-
return v;
751+
752+
exp = exp.wrapping_add(exponent);
683753
}
684754

685-
if str.is_empty() {
755+
// Fail if we got digits, but we're in strict mode and not at end of string.
756+
if strict && !s.is_empty() {
686757
return f64::NAN;
687758
}
688759

760+
// Finally, calculate the result.
761+
let mut result = 0.0;
762+
for c in after_sign {
763+
if let Some(digit) = u8::try_from(c)
764+
.ok()
765+
.and_then(|c| char::from(c).to_digit(10))
766+
{
767+
result += decimal_shift(digit.into(), exp);
768+
exp = exp.wrapping_sub(1);
769+
} else if c == b'.' as u16 {
770+
// Allow multiple dots.
771+
} else {
772+
break;
773+
}
774+
}
775+
776+
// Apply sign.
777+
if is_negative {
778+
result = -result;
779+
}
780+
781+
// We shouldn't return `NaN` after a successful parsing.
782+
debug_assert!(!result.is_nan());
783+
result
784+
}
785+
786+
/// Guess the radix of a string.
787+
///
788+
/// With an optional leading sign omitted:
789+
/// * Strings that start with `0x` (case insensitive) are considered hexadecimal.
790+
/// * Strings that start with a `0` and consist only of `0..=7` digits are considered octal.
791+
/// * All other strings are considered decimal.
792+
fn guess_radix(s: &WStr) -> u32 {
793+
// Optionally skip sign.
794+
let s = s.strip_prefix(b"+-".as_ref()).unwrap_or(s);
795+
796+
if let Some(s) = s.strip_prefix(b'0') {
797+
if s.starts_with(b"xX".as_ref()) {
798+
// Hexadecimal.
799+
return 16;
800+
}
801+
802+
if s.iter().all(|c| c >= b'0' as u16 && c <= b'7' as u16) {
803+
// Octal.
804+
return 8;
805+
}
806+
}
807+
808+
// Decimal.
809+
10
810+
}
811+
812+
/// Converts a `WStr` to an `f64` based on the SWF version.
813+
fn string_to_f64(mut s: &WStr, swf_version: u8) -> f64 {
689814
if swf_version >= 6 {
690-
if let Some(v) = str.strip_prefix(WStr::from_units(b"0x")) {
691-
// Flash allows the '-' sign here.
692-
return match Wrapping::<i32>::from_wstr_radix(v, 16) {
693-
Ok(n) => f64::from(n.0 as i32),
815+
let radix = guess_radix(s);
816+
817+
// Parse hexadecimal and octal numbers as integers.
818+
if radix != 10 {
819+
if radix == 16 {
820+
// Bug compatibility: Flash fails to skip an hexadecimal prefix with a sign,
821+
// causing such strings to be parsed as `NaN`.
822+
s = &s[2..];
823+
}
824+
825+
return match Wrapping::<i32>::from_wstr_radix(s, radix) {
826+
Ok(result) => result.0.into(),
694827
Err(_) => f64::NAN,
695828
};
696-
} else if str.starts_with(b'0')
697-
|| str.starts_with(WStr::from_units(b"+0"))
698-
|| str.starts_with(WStr::from_units(b"-0"))
699-
{
700-
// Flash allows the '-' sign here.
701-
if let Ok(n) = Wrapping::<i32>::from_wstr_radix(str, 8) {
702-
return f64::from(n.0);
703-
}
704829
}
705830
}
706831

707-
// Rust parses "inf", "+inf" and "infinity" into Infinity, but Flash doesn't.
708-
// Check if the string starts with 'i' (ignoring any leading +/-).
709-
if str
710-
.strip_prefix(&b"+-"[..])
711-
.unwrap_or(str)
712-
.starts_with(&b"iI"[..])
713-
{
714-
f64::NAN
832+
let strict = swf_version >= 5;
833+
let result = parse_float_impl(s, strict);
834+
if !strict && result.is_nan() {
835+
// In non-strict mode, return `0.0` rather than `NaN`.
836+
0.0
715837
} else {
716-
str.trim_start_matches(&b"\t\n\r "[..])
717-
.parse()
718-
.unwrap_or(f64::NAN)
838+
result
719839
}
720840
}
721841

0 commit comments

Comments
 (0)