Skip to content

Commit fb98198

Browse files
authored
fix(parser): Don't panic on bad hex characters (#1097)
Fixes #1096
2 parents 195d65c + 85761c4 commit fb98198

File tree

6 files changed

+291
-36
lines changed

6 files changed

+291
-36
lines changed

crates/toml_parser/src/decoder/scalar.rs

Lines changed: 109 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ pub(crate) fn decode_unquoted_scalar<'i>(
9999
let Some(first) = s.as_bytes().first() else {
100100
return decode_invalid(raw, output, error);
101101
};
102+
if !first.is_ascii_digit() && s.contains(" ") {
103+
// Only datetimes can have a space
104+
return decode_invalid(raw, output, error);
105+
}
102106
match first {
103107
// number starts
104108
b'+' | b'-' => {
@@ -114,8 +118,11 @@ pub(crate) fn decode_unquoted_scalar<'i>(
114118
b'.' => {
115119
let kind = ScalarKind::Float;
116120
let stream = raw.as_str();
117-
ensure_float(stream, raw, error);
118-
decode_float_or_integer(stream, raw, kind, output, error)
121+
if ensure_float(stream, raw, error) {
122+
decode_float_or_integer(stream, raw, kind, output, error)
123+
} else {
124+
kind
125+
}
119126
}
120127
b't' | b'T' => {
121128
const SYMBOL: &str = "true";
@@ -145,7 +152,7 @@ pub(crate) fn decode_unquoted_scalar<'i>(
145152
}
146153
}
147154

148-
pub(crate) fn decode_sign_prefix<'i>(
155+
fn decode_sign_prefix<'i>(
149156
raw: Raw<'i>,
150157
value: &'i str,
151158
output: &mut dyn StringBuilder<'i>,
@@ -178,8 +185,11 @@ pub(crate) fn decode_sign_prefix<'i>(
178185
b'.' => {
179186
let kind = ScalarKind::Float;
180187
let stream = raw.as_str();
181-
ensure_float(stream, raw, error);
182-
decode_float_or_integer(stream, raw, kind, output, error)
188+
if ensure_float(stream, raw, error) {
189+
decode_float_or_integer(stream, raw, kind, output, error)
190+
} else {
191+
kind
192+
}
183193
}
184194
b'i' | b'I' => {
185195
const SYMBOL: &str = "inf";
@@ -221,7 +231,7 @@ pub(crate) fn decode_sign_prefix<'i>(
221231
}
222232
}
223233

224-
pub(crate) fn decode_zero_prefix<'i>(
234+
fn decode_zero_prefix<'i>(
225235
value: &'i str,
226236
signed: bool,
227237
raw: Raw<'i>,
@@ -237,6 +247,10 @@ pub(crate) fn decode_zero_prefix<'i>(
237247
let radix = value.as_bytes()[1];
238248
match radix {
239249
b'x' | b'X' => {
250+
if value.contains(" ") {
251+
// Only datetimes can have a space
252+
return decode_invalid(raw, output, error);
253+
}
240254
if signed {
241255
error.report_error(
242256
ParseError::new("integers with a radix cannot be signed")
@@ -258,10 +272,17 @@ pub(crate) fn decode_zero_prefix<'i>(
258272
let radix = IntegerRadix::Hex;
259273
let kind = ScalarKind::Integer(radix);
260274
let stream = &value[2..];
261-
ensure_radixed_value(stream, raw, radix, error);
262-
decode_float_or_integer(stream, raw, kind, output, error)
275+
if ensure_radixed_value(stream, raw, radix, error) {
276+
decode_float_or_integer(stream, raw, kind, output, error)
277+
} else {
278+
kind
279+
}
263280
}
264281
b'o' | b'O' => {
282+
if value.contains(" ") {
283+
// Only datetimes can have a space
284+
return decode_invalid(raw, output, error);
285+
}
265286
if signed {
266287
error.report_error(
267288
ParseError::new("integers with a radix cannot be signed")
@@ -283,10 +304,17 @@ pub(crate) fn decode_zero_prefix<'i>(
283304
let radix = IntegerRadix::Oct;
284305
let kind = ScalarKind::Integer(radix);
285306
let stream = &value[2..];
286-
ensure_radixed_value(stream, raw, radix, error);
287-
decode_float_or_integer(stream, raw, kind, output, error)
307+
if ensure_radixed_value(stream, raw, radix, error) {
308+
decode_float_or_integer(stream, raw, kind, output, error)
309+
} else {
310+
kind
311+
}
288312
}
289313
b'b' | b'B' => {
314+
if value.contains(" ") {
315+
// Only datetimes can have a space
316+
return decode_invalid(raw, output, error);
317+
}
290318
if signed {
291319
error.report_error(
292320
ParseError::new("integers with a radix cannot be signed")
@@ -308,10 +336,17 @@ pub(crate) fn decode_zero_prefix<'i>(
308336
let radix = IntegerRadix::Bin;
309337
let kind = ScalarKind::Integer(radix);
310338
let stream = &value[2..];
311-
ensure_radixed_value(stream, raw, radix, error);
312-
decode_float_or_integer(stream, raw, kind, output, error)
339+
if ensure_radixed_value(stream, raw, radix, error) {
340+
decode_float_or_integer(stream, raw, kind, output, error)
341+
} else {
342+
kind
343+
}
313344
}
314345
b'd' | b'D' => {
346+
if value.contains(" ") {
347+
// Only datetimes can have a space
348+
return decode_invalid(raw, output, error);
349+
}
315350
if signed {
316351
error.report_error(
317352
ParseError::new("integers with a radix cannot be signed")
@@ -329,15 +364,18 @@ pub(crate) fn decode_zero_prefix<'i>(
329364
.with_expected(&[])
330365
.with_unexpected(Span::new_unchecked(0, 2)),
331366
);
332-
ensure_radixed_value(stream, raw, radix, error);
333-
decode_float_or_integer(stream, raw, kind, output, error)
367+
if ensure_radixed_value(stream, raw, radix, error) {
368+
decode_float_or_integer(stream, raw, kind, output, error)
369+
} else {
370+
kind
371+
}
334372
}
335373
_ => decode_datetime_or_float_or_integer(value, raw, output, error),
336374
}
337375
}
338376
}
339377

340-
pub(crate) fn decode_datetime_or_float_or_integer<'i>(
378+
fn decode_datetime_or_float_or_integer<'i>(
341379
value: &'i str,
342380
raw: Raw<'i>,
343381
output: &mut dyn StringBuilder<'i>,
@@ -349,8 +387,11 @@ pub(crate) fn decode_datetime_or_float_or_integer<'i>(
349387
else {
350388
let kind = ScalarKind::Integer(IntegerRadix::Dec);
351389
let stream = raw.as_str();
352-
ensure_no_leading_zero(value, raw, error);
353-
return decode_float_or_integer(stream, raw, kind, output, error);
390+
if ensure_no_leading_zero(value, raw, error) {
391+
return decode_float_or_integer(stream, raw, kind, output, error);
392+
} else {
393+
return kind;
394+
}
354395
};
355396

356397
#[cfg(feature = "unsafe")] // SAFETY: ascii digits ensures UTF-8 boundary
@@ -365,13 +406,19 @@ pub(crate) fn decode_datetime_or_float_or_integer<'i>(
365406
} else if is_float(rest) {
366407
let kind = ScalarKind::Float;
367408
let stream = raw.as_str();
368-
ensure_float(value, raw, error);
369-
decode_float_or_integer(stream, raw, kind, output, error)
409+
if ensure_float(value, raw, error) {
410+
decode_float_or_integer(stream, raw, kind, output, error)
411+
} else {
412+
kind
413+
}
370414
} else if rest.starts_with("_") {
371415
let kind = ScalarKind::Integer(IntegerRadix::Dec);
372416
let stream = raw.as_str();
373-
ensure_no_leading_zero(value, raw, error);
374-
decode_float_or_integer(stream, raw, kind, output, error)
417+
if ensure_no_leading_zero(value, raw, error) {
418+
decode_float_or_integer(stream, raw, kind, output, error)
419+
} else {
420+
kind
421+
}
375422
} else {
376423
decode_invalid(raw, output, error)
377424
}
@@ -390,20 +437,23 @@ pub(crate) fn decode_datetime_or_float_or_integer<'i>(
390437
/// exp = "e" float-exp-part
391438
/// float-exp-part = [ minus / plus ] zero-prefixable-int
392439
/// ```
393-
pub(crate) fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
394-
ensure_dec_uint(&mut value, raw, false, "invalid mantissa", error);
440+
#[must_use]
441+
fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) -> bool {
442+
let mut is_valid = true;
443+
444+
is_valid &= ensure_dec_uint(&mut value, raw, false, "invalid mantissa", error);
395445

396446
if value.starts_with(".") {
397447
let _ = value.next_token();
398-
ensure_dec_uint(&mut value, raw, true, "invalid fraction", error);
448+
is_valid &= ensure_dec_uint(&mut value, raw, true, "invalid fraction", error);
399449
}
400450

401451
if value.starts_with(['e', 'E']) {
402452
let _ = value.next_token();
403453
if value.starts_with(['+', '-']) {
404454
let _ = value.next_token();
405455
}
406-
ensure_dec_uint(&mut value, raw, true, "invalid exponent", error);
456+
is_valid &= ensure_dec_uint(&mut value, raw, true, "invalid exponent", error);
407457
}
408458

409459
if !value.is_empty() {
@@ -415,16 +465,22 @@ pub(crate) fn ensure_float<'i>(mut value: &'i str, raw: Raw<'i>, error: &mut dyn
415465
.with_expected(&[])
416466
.with_unexpected(Span::new_unchecked(start, end)),
417467
);
468+
is_valid = false;
418469
}
470+
471+
is_valid
419472
}
420473

421-
pub(crate) fn ensure_dec_uint<'i>(
474+
#[must_use]
475+
fn ensure_dec_uint<'i>(
422476
value: &mut &'i str,
423477
raw: Raw<'i>,
424478
zero_prefix: bool,
425479
invalid_description: &'static str,
426480
error: &mut dyn ErrorSink,
427-
) {
481+
) -> bool {
482+
let mut is_valid = true;
483+
428484
let start = *value;
429485
let mut digit_count = 0;
430486
while let Some(current) = value.chars().next() {
@@ -447,6 +503,7 @@ pub(crate) fn ensure_dec_uint<'i>(
447503
.with_expected(&[Expected::Description("digits")])
448504
.with_unexpected(Span::new_unchecked(start, end)),
449505
);
506+
is_valid = false;
450507
}
451508
1 => {}
452509
_ if start.starts_with("0") && !zero_prefix => {
@@ -458,12 +515,18 @@ pub(crate) fn ensure_dec_uint<'i>(
458515
.with_expected(&[])
459516
.with_unexpected(Span::new_unchecked(start, end)),
460517
);
518+
is_valid = false;
461519
}
462520
_ => {}
463521
}
522+
523+
is_valid
464524
}
465525

466-
pub(crate) fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) {
526+
#[must_use]
527+
fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &mut dyn ErrorSink) -> bool {
528+
let mut is_valid = true;
529+
467530
if value.starts_with("0") {
468531
let start = value.offset_from(&raw.as_str());
469532
let end = start + 1;
@@ -473,15 +536,21 @@ pub(crate) fn ensure_no_leading_zero<'i>(value: &'i str, raw: Raw<'i>, error: &m
473536
.with_expected(&[])
474537
.with_unexpected(Span::new_unchecked(start, end)),
475538
);
539+
is_valid = false;
476540
}
541+
542+
is_valid
477543
}
478544

479-
pub(crate) fn ensure_radixed_value(
545+
#[must_use]
546+
fn ensure_radixed_value(
480547
value: &str,
481548
raw: Raw<'_>,
482549
radix: IntegerRadix,
483550
error: &mut dyn ErrorSink,
484-
) {
551+
) -> bool {
552+
let mut is_valid = true;
553+
485554
let invalid = ['+', '-'];
486555
let value = if let Some(value) = value.strip_prefix(invalid) {
487556
let pos = raw.as_str().find(invalid).unwrap();
@@ -491,6 +560,7 @@ pub(crate) fn ensure_radixed_value(
491560
.with_expected(&[])
492561
.with_unexpected(Span::new_unchecked(pos, pos + 1)),
493562
);
563+
is_valid = false;
494564
value
495565
} else {
496566
value
@@ -505,11 +575,14 @@ pub(crate) fn ensure_radixed_value(
505575
.with_context(Span::new_unchecked(0, raw.len()))
506576
.with_unexpected(Span::new_unchecked(pos, pos)),
507577
);
578+
is_valid = false;
508579
}
509580
}
581+
582+
is_valid
510583
}
511584

512-
pub(crate) fn decode_float_or_integer<'i>(
585+
fn decode_float_or_integer<'i>(
513586
stream: &'i str,
514587
raw: Raw<'i>,
515588
kind: ScalarKind,
@@ -547,7 +620,7 @@ pub(crate) fn decode_float_or_integer<'i>(
547620
if 0 < part_start {
548621
let first = part.as_bytes().first().copied().unwrap_or(b'0');
549622
if !is_any_digit(first, kind) {
550-
let start = part_start - 1;
623+
let start = part_start - underscore.len();
551624
let end = part_start;
552625
debug_assert_eq!(&raw.as_str()[start..end], underscore);
553626
error.report_error(
@@ -624,7 +697,7 @@ fn is_float(raw: &str) -> bool {
624697
raw.as_bytes().find_slice((b'.', b'e', b'E')).is_some()
625698
}
626699

627-
pub(crate) fn decode_as_is<'i>(
700+
fn decode_as_is<'i>(
628701
raw: Raw<'i>,
629702
kind: ScalarKind,
630703
output: &mut dyn StringBuilder<'i>,
@@ -634,7 +707,7 @@ pub(crate) fn decode_as_is<'i>(
634707
kind
635708
}
636709

637-
pub(crate) fn decode_as<'i>(
710+
fn decode_as<'i>(
638711
raw: Raw<'i>,
639712
symbol: &'i str,
640713
kind: ScalarKind,
@@ -650,7 +723,7 @@ pub(crate) fn decode_as<'i>(
650723
kind
651724
}
652725

653-
pub(crate) fn decode_symbol<'i>(
726+
fn decode_symbol<'i>(
654727
raw: Raw<'i>,
655728
symbol: &'static str,
656729
kind: ScalarKind,
@@ -674,7 +747,7 @@ pub(crate) fn decode_symbol<'i>(
674747
decode_as(raw, symbol, kind, output, error)
675748
}
676749

677-
pub(crate) fn decode_invalid<'i>(
750+
fn decode_invalid<'i>(
678751
raw: Raw<'i>,
679752
output: &mut dyn StringBuilder<'i>,
680753
error: &mut dyn ErrorSink,

0 commit comments

Comments
 (0)