Skip to content

Commit e4e1c9f

Browse files
yoctopucedpgeorge
authored andcommitted
py/parsenum: Refactor float parsing code.
This commit extracts from the current float parsing code two functions which could be reused elsewhere in MicroPython. The code used to multiply a float x by a power of 10 is also simplified by applying the binary exponent separately from the power of 5. This avoids the risk of overflow in the intermediate stage, before multiplying by x. Signed-off-by: Yoctopuce dev <[email protected]>
1 parent ffa98cb commit e4e1c9f

File tree

2 files changed

+117
-99
lines changed

2 files changed

+117
-99
lines changed

py/parsenum.c

Lines changed: 112 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
195195
}
196196
}
197197

198+
#if MICROPY_PY_BUILTINS_FLOAT
199+
198200
enum {
199201
REAL_IMAG_STATE_START = 0,
200202
REAL_IMAG_STATE_HAVE_REAL = 1,
@@ -207,25 +209,39 @@ typedef enum {
207209
PARSE_DEC_IN_EXP,
208210
} parse_dec_in_t;
209211

210-
#if MICROPY_PY_BUILTINS_FLOAT
211212
// MANTISSA_MAX is used to retain precision while not overflowing mantissa
212-
// SMALL_NORMAL_VAL is the smallest power of 10 that is still a normal float
213-
// EXACT_POWER_OF_10 is the largest value of x so that 10^x can be stored exactly in a float
214-
// Note: EXACT_POWER_OF_10 is at least floor(log_5(2^mantissa_length)). Indeed, 10^n = 2^n * 5^n
215-
// so we only have to store the 5^n part in the mantissa (the 2^n part will go into the float's
216-
// exponent).
213+
#define MANTISSA_MAX (sizeof(mp_float_uint_t) == 8 ? 0x1999999999999998ULL : 0x19999998U)
214+
215+
// MAX_EXACT_POWER_OF_5 is the largest value of x so that 5^x can be stored exactly in a float
217216
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
218-
#define MANTISSA_MAX 0x19999998U
219-
#define SMALL_NORMAL_VAL (1e-37F)
220-
#define SMALL_NORMAL_EXP (-37)
221-
#define EXACT_POWER_OF_10 (9)
217+
#define MAX_EXACT_POWER_OF_5 (10)
222218
#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
223-
#define MANTISSA_MAX 0x1999999999999998ULL
224-
#define SMALL_NORMAL_VAL (1e-307)
225-
#define SMALL_NORMAL_EXP (-307)
226-
#define EXACT_POWER_OF_10 (22)
219+
#define MAX_EXACT_POWER_OF_5 (22)
227220
#endif
228221

222+
// Helper to compute `num * (10.0 ** dec_exp)`
223+
mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp) {
224+
225+
if (dec_exp == 0 || num == MICROPY_FLOAT_CONST(0.0)) {
226+
return num;
227+
}
228+
mp_float_union_t res = {num};
229+
// Multiply first by (2.0 ** dec_exp) via the exponent
230+
// - this will ensure that the result of `pow()` is always in mp_float_t range
231+
// when the result is expected to be in mp_float_t range (e.g. during format)
232+
// - we don't need to care about p.exp overflow, as (5.0 ** dec_exp) will anyway
233+
// force the final result toward the proper edge if needed (0.0 or inf)
234+
res.p.exp += dec_exp;
235+
// Use positive exponents when they are more precise then negative
236+
if (dec_exp < 0 && dec_exp >= -MAX_EXACT_POWER_OF_5) {
237+
res.f /= MICROPY_FLOAT_C_FUN(pow)(5, -dec_exp);
238+
} else {
239+
res.f *= MICROPY_FLOAT_C_FUN(pow)(5, dec_exp);
240+
}
241+
return (mp_float_t)res.f;
242+
}
243+
244+
229245
// Break out inner digit accumulation routine to ease trailing zero deferral.
230246
static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) {
231247
// Core routine to ingest an additional digit.
@@ -244,6 +260,85 @@ static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig
244260
return p_mantissa;
245261
}
246262
}
263+
264+
// Helper to parse an unsigned decimal number into a mp_float_t
265+
const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res) {
266+
const char *top = str + len;
267+
268+
parse_dec_in_t in = PARSE_DEC_IN_INTG;
269+
bool exp_neg = false;
270+
mp_float_uint_t mantissa = 0;
271+
int exp_val = 0;
272+
int exp_extra = 0;
273+
int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
274+
while (str < top) {
275+
unsigned int dig = *str++;
276+
if ('0' <= dig && dig <= '9') {
277+
dig -= '0';
278+
if (in == PARSE_DEC_IN_EXP) {
279+
// don't overflow exp_val when adding next digit, instead just truncate
280+
// it and the resulting float will still be correct, either inf or 0.0
281+
// (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
282+
if (exp_val < (INT_MAX / 2 - 9) / 10) {
283+
exp_val = 10 * exp_val + dig;
284+
}
285+
} else {
286+
if (dig == 0 || mantissa >= MANTISSA_MAX) {
287+
// Defer treatment of zeros in fractional part. If nothing comes afterwards, ignore them.
288+
// Also, once we reach MANTISSA_MAX, treat every additional digit as a trailing zero.
289+
if (in == PARSE_DEC_IN_INTG) {
290+
++trailing_zeros_intg;
291+
} else {
292+
++trailing_zeros_frac;
293+
}
294+
} else {
295+
// Time to un-defer any trailing zeros. Intg zeros first.
296+
while (trailing_zeros_intg) {
297+
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_INTG);
298+
--trailing_zeros_intg;
299+
}
300+
while (trailing_zeros_frac) {
301+
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_FRAC);
302+
--trailing_zeros_frac;
303+
}
304+
mantissa = accept_digit(mantissa, dig, &exp_extra, in);
305+
}
306+
}
307+
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
308+
in = PARSE_DEC_IN_FRAC;
309+
} else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
310+
in = PARSE_DEC_IN_EXP;
311+
if (str < top) {
312+
if (str[0] == '+') {
313+
str++;
314+
} else if (str[0] == '-') {
315+
str++;
316+
exp_neg = true;
317+
}
318+
}
319+
if (str == top) {
320+
return NULL;
321+
}
322+
} else if (dig == '_') {
323+
continue;
324+
} else {
325+
// unknown character
326+
str--;
327+
break;
328+
}
329+
}
330+
331+
// work out the exponent
332+
if (exp_neg) {
333+
exp_val = -exp_val;
334+
}
335+
exp_val += exp_extra + trailing_zeros_intg;
336+
337+
// At this point, we just need to multiply the mantissa by its base 10 exponent.
338+
*res = (mp_float_t)mp_decimal_exp(mantissa, exp_val);
339+
340+
return str;
341+
}
247342
#endif // MICROPY_PY_BUILTINS_FLOAT
248343

249344
#if MICROPY_PY_BUILTINS_COMPLEX
@@ -295,91 +390,9 @@ parse_start:;
295390
dec_val = MICROPY_FLOAT_C_FUN(nan)("");
296391
} else {
297392
// string should be a decimal number
298-
parse_dec_in_t in = PARSE_DEC_IN_INTG;
299-
bool exp_neg = false;
300-
mp_float_uint_t mantissa = 0;
301-
int exp_val = 0;
302-
int exp_extra = 0;
303-
int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
304-
while (str < top) {
305-
unsigned int dig = *str++;
306-
if ('0' <= dig && dig <= '9') {
307-
dig -= '0';
308-
if (in == PARSE_DEC_IN_EXP) {
309-
// don't overflow exp_val when adding next digit, instead just truncate
310-
// it and the resulting float will still be correct, either inf or 0.0
311-
// (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
312-
if (exp_val < (INT_MAX / 2 - 9) / 10) {
313-
exp_val = 10 * exp_val + dig;
314-
}
315-
} else {
316-
if (dig == 0 || mantissa >= MANTISSA_MAX) {
317-
// Defer treatment of zeros in fractional part. If nothing comes afterwards, ignore them.
318-
// Also, once we reach MANTISSA_MAX, treat every additional digit as a trailing zero.
319-
if (in == PARSE_DEC_IN_INTG) {
320-
++trailing_zeros_intg;
321-
} else {
322-
++trailing_zeros_frac;
323-
}
324-
} else {
325-
// Time to un-defer any trailing zeros. Intg zeros first.
326-
while (trailing_zeros_intg) {
327-
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_INTG);
328-
--trailing_zeros_intg;
329-
}
330-
while (trailing_zeros_frac) {
331-
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_FRAC);
332-
--trailing_zeros_frac;
333-
}
334-
mantissa = accept_digit(mantissa, dig, &exp_extra, in);
335-
}
336-
}
337-
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
338-
in = PARSE_DEC_IN_FRAC;
339-
} else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
340-
in = PARSE_DEC_IN_EXP;
341-
if (str < top) {
342-
if (str[0] == '+') {
343-
str++;
344-
} else if (str[0] == '-') {
345-
str++;
346-
exp_neg = true;
347-
}
348-
}
349-
if (str == top) {
350-
goto value_error;
351-
}
352-
} else if (dig == '_') {
353-
continue;
354-
} else {
355-
// unknown character
356-
str--;
357-
break;
358-
}
359-
}
360-
361-
// work out the exponent
362-
if (exp_neg) {
363-
exp_val = -exp_val;
364-
}
365-
366-
// apply the exponent, making sure it's not a subnormal value
367-
exp_val += exp_extra + trailing_zeros_intg;
368-
dec_val = (mp_float_t)mantissa;
369-
if (exp_val < SMALL_NORMAL_EXP) {
370-
exp_val -= SMALL_NORMAL_EXP;
371-
dec_val *= SMALL_NORMAL_VAL;
372-
}
373-
374-
// At this point, we need to multiply the mantissa by its base 10 exponent. If possible,
375-
// we would rather manipulate numbers that have an exact representation in IEEE754. It
376-
// turns out small positive powers of 10 do, whereas small negative powers of 10 don't.
377-
// So in that case, we'll yield a division of exact values rather than a multiplication
378-
// of slightly erroneous values.
379-
if (exp_val < 0 && exp_val >= -EXACT_POWER_OF_10) {
380-
dec_val /= MICROPY_FLOAT_C_FUN(pow)(10, -exp_val);
381-
} else {
382-
dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
393+
str = mp_parse_float_internal(str, top - str, &dec_val);
394+
if (!str) {
395+
goto value_error;
383396
}
384397
}
385398

py/parsenum.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434

3535
mp_obj_t mp_parse_num_integer(const char *restrict str, size_t len, int base, mp_lexer_t *lex);
3636

37+
#if MICROPY_PY_BUILTINS_FLOAT
38+
mp_float_t mp_decimal_exp(mp_float_t num, int dec_exp);
39+
const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res);
40+
#endif
41+
3742
#if MICROPY_PY_BUILTINS_COMPLEX
3843
mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex);
3944

0 commit comments

Comments
 (0)