Skip to content

Commit 4124275

Browse files
committed
Remove ParseErrorKind::InvalidPctEncodedOctet
1 parent d64103a commit 4124275

File tree

5 files changed

+62
-52
lines changed

5 files changed

+62
-52
lines changed

src/fmt.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ impl<E: Encoder> Display for EStr<E> {
2121
impl Display for ParseError {
2222
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
2323
let msg = match self.kind {
24-
ParseErrorKind::InvalidPctEncodedOctet => "invalid percent-encoded octet at index ",
25-
ParseErrorKind::UnexpectedChar => "unexpected character at index ",
24+
ParseErrorKind::UnexpectedCharOrEnd => "unexpected character or end of input at index ",
2625
ParseErrorKind::InvalidIpv6Addr => "invalid IPv6 address at index ",
2726
};
2827
write!(f, "{}{}", msg, self.index)

src/parse.rs

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,10 @@ use core::{
1212
/// Detailed cause of a [`ParseError`].
1313
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1414
pub enum ParseErrorKind {
15-
/// Invalid percent-encoded octet that is either non-hexadecimal or incomplete.
15+
/// Unexpected character or end of input.
1616
///
17-
/// The error index points to the percent character "%" of the octet.
18-
InvalidPctEncodedOctet,
19-
/// Unexpected character that is not allowed by the URI/IRI syntax.
20-
///
21-
/// The error index points to the first byte of the character.
22-
UnexpectedChar,
17+
/// The error index points to the first byte of the character or the end of input.
18+
UnexpectedCharOrEnd,
2319
/// Invalid IPv6 address.
2420
///
2521
/// The error index points to the first byte of the address.
@@ -50,14 +46,14 @@ impl ParseError {
5046
#[cfg(feature = "impl-error")]
5147
impl crate::Error for ParseError {}
5248

53-
type Result<T> = core::result::Result<T, crate::parse::ParseError>;
49+
type Result<T> = core::result::Result<T, ParseError>;
5450

5551
/// Returns immediately with an error.
5652
macro_rules! err {
5753
($index:expr, $kind:ident) => {
58-
return Err(crate::parse::ParseError {
54+
return Err(ParseError {
5955
index: $index,
60-
kind: crate::parse::ParseErrorKind::$kind,
56+
kind: ParseErrorKind::$kind,
6157
})
6258
};
6359
}
@@ -163,6 +159,17 @@ impl<'a> Reader<'a> {
163159
Ok(self.pos > start)
164160
}
165161

162+
#[cold]
163+
fn invalid_pct(&self) -> Result<()> {
164+
let mut i = self.pos + 1;
165+
if let Some(&x) = self.bytes.get(i) {
166+
if pct_enc::is_hexdig(x) {
167+
i += 1;
168+
}
169+
}
170+
err!(i, UnexpectedCharOrEnd);
171+
}
172+
166173
fn read_with(&mut self, table: Table, mut f: impl FnMut(usize, u32)) -> Result<()> {
167174
let mut i = self.pos;
168175

@@ -172,10 +179,10 @@ impl<'a> Reader<'a> {
172179
let x = self.bytes[i];
173180
if $allow_pct_encoded && x == b'%' {
174181
let [hi, lo, ..] = self.bytes[i + 1..] else {
175-
err!(i, InvalidPctEncodedOctet);
182+
return self.invalid_pct();
176183
};
177-
if !pct_enc::is_valid_octet(hi, lo) {
178-
err!(i, InvalidPctEncodedOctet);
184+
if !pct_enc::is_hexdig_pair(hi, lo) {
185+
return self.invalid_pct();
179186
}
180187
i += 3;
181188
} else if $allow_non_ascii {
@@ -395,7 +402,7 @@ impl<'a> Reader<'a> {
395402
};
396403

397404
if !self.read_str("]") {
398-
err!(self.pos, UnexpectedChar);
405+
err!(self.pos, UnexpectedCharOrEnd);
399406
}
400407
Ok(Some(meta))
401408
}
@@ -408,7 +415,7 @@ impl<'a> Reader<'a> {
408415
return Ok(());
409416
}
410417
}
411-
err!(self.pos, UnexpectedChar);
418+
err!(self.pos, UnexpectedCharOrEnd);
412419
}
413420
}
414421

@@ -463,7 +470,7 @@ impl Parser<'_> {
463470
if self.pos > 0 && self.bytes[0].is_ascii_alphabetic() {
464471
self.out.scheme_end = NonZeroUsize::new(self.pos);
465472
} else {
466-
err!(0, UnexpectedChar);
473+
err!(0, UnexpectedCharOrEnd);
467474
}
468475

469476
// INVARIANT: Skipping ":" is fine.
@@ -474,7 +481,7 @@ impl Parser<'_> {
474481
self.parse_from_path(PathKind::General)
475482
};
476483
} else if self.constraints.scheme_required {
477-
err!(self.pos, UnexpectedChar);
484+
err!(self.pos, UnexpectedCharOrEnd);
478485
} else if self.pos == 0 {
479486
// Nothing read.
480487
if self.read_str("//") {
@@ -530,13 +537,13 @@ impl Parser<'_> {
530537
1 => {
531538
for i in colon_idx + 1..self.pos {
532539
if !self.bytes[i].is_ascii_digit() {
533-
err!(i, UnexpectedChar);
540+
err!(i, UnexpectedCharOrEnd);
534541
}
535542
}
536543
colon_idx
537544
}
538545
// Multiple colons.
539-
_ => err!(colon_idx, UnexpectedChar),
546+
_ => err!(colon_idx, UnexpectedCharOrEnd),
540547
};
541548

542549
let meta = parse_v4_or_reg_name(&self.bytes[auth_start..host_end]);
@@ -562,7 +569,7 @@ impl Parser<'_> {
562569
let start = self.pos;
563570
// Either empty or starting with '/'.
564571
if self.read(path_table)? && self.bytes[start] != b'/' {
565-
err!(start, UnexpectedChar);
572+
err!(start, UnexpectedCharOrEnd);
566573
}
567574
(start, self.pos)
568575
}
@@ -573,7 +580,7 @@ impl Parser<'_> {
573580
if self.peek(0) == Some(b':') {
574581
// In a relative reference, the first path
575582
// segment cannot contain a colon character.
576-
err!(self.pos, UnexpectedChar);
583+
err!(self.pos, UnexpectedCharOrEnd);
577584
}
578585

579586
self.read(path_table)?;
@@ -593,7 +600,7 @@ impl Parser<'_> {
593600
}
594601

595602
if self.has_remaining() {
596-
err!(self.pos, UnexpectedChar);
603+
err!(self.pos, UnexpectedCharOrEnd);
597604
}
598605
Ok(())
599606
}

src/pct_enc/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,11 @@ pub(crate) fn decode_hexdigit(x: u8) -> Option<u8> {
487487
Some(OCTET_TABLE_LO[x as usize]).filter(|&v| v < 128)
488488
}
489489

490-
pub(crate) const fn is_valid_octet(hi: u8, lo: u8) -> bool {
490+
pub(crate) const fn is_hexdig(x: u8) -> bool {
491+
OCTET_TABLE_LO[x as usize] < 128
492+
}
493+
494+
pub(crate) const fn is_hexdig_pair(hi: u8, lo: u8) -> bool {
491495
OCTET_TABLE_LO[hi as usize] | OCTET_TABLE_LO[lo as usize] < 128
492496
}
493497

src/pct_enc/table.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ impl Table {
154154
}
155155
let (hi, lo) = (s[i + 1], s[i + 2]);
156156

157-
if !pct_enc::is_valid_octet(hi, lo) {
157+
if !pct_enc::is_hexdig_pair(hi, lo) {
158158
return false;
159159
}
160160
i += 3;

tests/parse.rs

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -249,20 +249,20 @@ fn parse_error_uri() {
249249
}
250250

251251
// No scheme
252-
fail("foo", 3, UnexpectedChar);
252+
fail("foo", 3, UnexpectedCharOrEnd);
253253

254254
// Empty scheme
255-
fail(":hello", 0, UnexpectedChar);
255+
fail(":hello", 0, UnexpectedCharOrEnd);
256256

257257
// Scheme starts with non-letter
258-
fail("3ttp://a.com", 0, UnexpectedChar);
258+
fail("3ttp://a.com", 0, UnexpectedCharOrEnd);
259259

260260
// Unexpected char in scheme
261-
fail("exam=ple:foo", 4, UnexpectedChar);
262-
fail("(:", 0, UnexpectedChar);
261+
fail("exam=ple:foo", 4, UnexpectedCharOrEnd);
262+
fail("(:", 0, UnexpectedCharOrEnd);
263263

264264
// Percent-encoded scheme
265-
fail("a%20:foo", 1, UnexpectedChar);
265+
fail("a%20:foo", 1, UnexpectedCharOrEnd);
266266
}
267267

268268
#[track_caller]
@@ -275,64 +275,64 @@ fn fail(input: &str, index: usize, kind: ParseErrorKind) {
275275
#[test]
276276
fn parse_error_uri_ref() {
277277
// Empty scheme
278-
fail(":hello", 0, UnexpectedChar);
278+
fail(":hello", 0, UnexpectedCharOrEnd);
279279

280280
// Scheme starts with non-letter
281-
fail("3ttp://a.com", 0, UnexpectedChar);
281+
fail("3ttp://a.com", 0, UnexpectedCharOrEnd);
282282

283283
// After rewriting the parser, the following two cases are interpreted as
284284
// containing colon in the first path segment of a relative reference.
285285

286286
// Unexpected char in scheme
287-
fail("exam=ple:foo", 8, UnexpectedChar);
288-
fail("(:", 1, UnexpectedChar);
287+
fail("exam=ple:foo", 8, UnexpectedCharOrEnd);
288+
fail("(:", 1, UnexpectedCharOrEnd);
289289

290290
// Percent-encoded scheme
291-
fail("a%20:foo", 4, UnexpectedChar);
291+
fail("a%20:foo", 4, UnexpectedCharOrEnd);
292292

293293
// Unexpected char in path
294-
fail("foo\\bar", 3, UnexpectedChar);
294+
fail("foo\\bar", 3, UnexpectedCharOrEnd);
295295

296296
// Non-hexadecimal percent-encoded octet
297-
fail("foo%xxd", 3, InvalidPctEncodedOctet);
297+
fail("foo%xxd", 4, UnexpectedCharOrEnd);
298298

299299
// Incomplete percent-encoded octet
300-
fail("text%a", 4, InvalidPctEncodedOctet);
300+
fail("text%a", 6, UnexpectedCharOrEnd);
301301

302302
// A single percent
303-
fail("%", 0, InvalidPctEncodedOctet);
303+
fail("%", 1, UnexpectedCharOrEnd);
304304

305305
// Non-decimal port
306-
fail("http://example.com:80ab", 21, UnexpectedChar);
307-
fail("http://user@example.com:80ab", 26, UnexpectedChar);
306+
fail("http://example.com:80ab", 21, UnexpectedCharOrEnd);
307+
fail("http://user@example.com:80ab", 26, UnexpectedCharOrEnd);
308308

309309
// Multiple colons in authority
310-
fail("http://user:pass:example.com/", 16, UnexpectedChar);
310+
fail("http://user:pass:example.com/", 16, UnexpectedCharOrEnd);
311311

312312
// Unclosed bracket
313-
fail("https://[::1/", 12, UnexpectedChar);
313+
fail("https://[::1/", 12, UnexpectedCharOrEnd);
314314

315315
// Not port after IP literal
316-
fail("https://[::1]wrong", 13, UnexpectedChar);
316+
fail("https://[::1]wrong", 13, UnexpectedCharOrEnd);
317317

318318
// IP literal too short
319319
fail("http://[:]", 8, InvalidIpv6Addr);
320-
fail("http://[]", 8, UnexpectedChar);
320+
fail("http://[]", 8, UnexpectedCharOrEnd);
321321

322322
// Non-hexadecimal version in IPvFuture
323-
fail("http://[vG.addr]", 9, UnexpectedChar);
323+
fail("http://[vG.addr]", 9, UnexpectedCharOrEnd);
324324

325325
// Empty version in IPvFuture
326-
fail("http://[v.addr]", 9, UnexpectedChar);
326+
fail("http://[v.addr]", 9, UnexpectedCharOrEnd);
327327

328328
// Empty address in IPvFuture
329-
fail("ftp://[vF.]", 10, UnexpectedChar);
329+
fail("ftp://[vF.]", 10, UnexpectedCharOrEnd);
330330

331331
// Percent-encoded address in IPvFuture
332-
fail("ftp://[vF.%20]", 10, UnexpectedChar);
332+
fail("ftp://[vF.%20]", 10, UnexpectedCharOrEnd);
333333

334334
// With zone identifier
335-
fail("ftp://[fe80::abcd%eth0]", 17, UnexpectedChar);
335+
fail("ftp://[fe80::abcd%eth0]", 17, UnexpectedCharOrEnd);
336336

337337
// Invalid IPv6 address
338338
fail("example://[44:55::66::77]", 11, InvalidIpv6Addr);

0 commit comments

Comments
 (0)