Skip to content

Commit 52a4c5a

Browse files
committed
Add ParserIterator
1 parent 85e0933 commit 52a4c5a

File tree

10 files changed

+161
-23
lines changed

10 files changed

+161
-23
lines changed

crates/utils/src/parser/base.rs

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ use crate::input::{InputError, MapWithInputExt};
22
use crate::parser::combinator::{
33
Map, MapResult, Optional, Or, RepeatArrayVec, RepeatN, RepeatVec, WithPrefix, WithSuffix,
44
};
5-
use crate::parser::error::WithErrorMsg;
5+
use crate::parser::error::{ParseError, WithErrorMsg};
6+
use crate::parser::iterator::ParserIterator;
67
use crate::parser::simple::{Constant, Eol};
78
use crate::parser::then::{Then2, Unimplemented};
8-
use crate::parser::ParseError;
99

1010
/// [`Result`] type returned by [`Parser::parse`].
1111
pub type ParseResult<'i, T> = Result<(T, &'i [u8]), (ParseError, &'i [u8])>;
@@ -164,7 +164,7 @@ pub trait Parser: Sized {
164164
/// # use utils::parser::{self, Parser};
165165
/// assert_eq!(
166166
/// parser::u32()
167-
/// .with_suffix(",".optional())
167+
/// .with_suffix(",".or(parser::eof()))
168168
/// .repeat_n() // N = 3 is inferred
169169
/// .parse(b"12,34,56"),
170170
/// Ok(([12, 34, 56], &b""[..]))
@@ -319,7 +319,7 @@ pub trait Parser: Sized {
319319
/// assert_eq!(
320320
/// parser::u32()
321321
/// .then(parser::u32().with_prefix("x"))
322-
/// .with_suffix(",".optional())
322+
/// .with_suffix(",".or(parser::eof()))
323323
/// .parse_all("1x2,3x4,1234x5678")
324324
/// .unwrap(),
325325
/// vec![
@@ -360,6 +360,56 @@ pub trait Parser: Sized {
360360
.repeat(Constant(()), 0)
361361
.parse_complete(input)
362362
}
363+
364+
/// Create an iterator which applies this parser repeatedly until the provided input is fully
365+
/// consumed.
366+
///
367+
/// The returned iterator will lazily parse the provided input string, producing a sequence of
368+
/// [`Result`] values. Once the end of input is reached, or an error is returned, the parser
369+
/// will always return [`None`].
370+
///
371+
/// # Examples
372+
/// ```
373+
/// # use utils::input::InputError;
374+
/// # use utils::parser::{self, Parser};
375+
/// let iterator = parser::u32()
376+
/// .with_suffix(parser::eol())
377+
/// .parse_iterator("12\n34\n56\n78");
378+
/// for item in iterator {
379+
/// println!("{}", item?);
380+
/// }
381+
/// # Ok::<(), InputError>(())
382+
/// ```
383+
///
384+
/// ```
385+
/// # use utils::parser::{self, Parser};
386+
/// let mut iterator = parser::u32()
387+
/// .with_suffix(parser::eol())
388+
/// .parse_iterator("12\n34\nnot a integer");
389+
/// assert_eq!(iterator.next().unwrap().unwrap(), 12);
390+
/// assert_eq!(iterator.next().unwrap().unwrap(), 34);
391+
/// assert!(iterator.next().unwrap().is_err());
392+
/// assert!(iterator.next().is_none());
393+
/// ```
394+
///
395+
/// ```
396+
/// # use utils::input::InputError;
397+
/// # use utils::parser::{self, Parser};
398+
/// let filtered = parser::u32()
399+
/// .with_suffix(parser::eol())
400+
/// .parse_iterator("11\n22\n33\n44\n55")
401+
/// .filter(|r| r.is_err() || r.as_ref().is_ok_and(|v| v % 2 == 0))
402+
/// .collect::<Result<Vec<u32>, InputError>>()?;
403+
/// assert_eq!(filtered, vec![22, 44]);
404+
/// # Ok::<(), InputError>(())
405+
/// ```
406+
fn parse_iterator(self, input: &str) -> ParserIterator<Self> {
407+
ParserIterator {
408+
input,
409+
remaining: input.as_bytes(),
410+
parser: self,
411+
}
412+
}
363413
}
364414

365415
// Workaround to allow using methods which consume a parser in methods which take references.
@@ -382,15 +432,15 @@ impl<'a, P: Parser> Parser for ParserRef<'a, P> {
382432
///
383433
/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
384434
impl Parser for &'static str {
385-
type Output<'i> = Self;
435+
type Output<'i> = ();
386436
type Then<T: Parser> = Then2<Self, T>;
387437

388438
#[inline]
389439
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
390440
// This is faster than using strip_prefix for the common case where the string is a short
391441
// string literal known at compile time.
392442
if input.len() >= self.len() && self.bytes().zip(input).all(|(a, &b)| a == b) {
393-
Ok((self, &input[self.len()..]))
443+
Ok(((), &input[self.len()..]))
394444
} else {
395445
Err((ParseError::ExpectedLiteral(self), input))
396446
}
@@ -405,13 +455,13 @@ impl Parser for &'static str {
405455
///
406456
/// Normally used with [`with_prefix`](Parser::with_prefix)/[`with_suffix`](Parser::with_suffix).
407457
impl Parser for u8 {
408-
type Output<'i> = Self;
458+
type Output<'i> = ();
409459
type Then<T: Parser> = Then2<Self, T>;
410460

411461
#[inline]
412462
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
413463
if input.first() == Some(self) {
414-
Ok((*self, &input[1..]))
464+
Ok(((), &input[1..]))
415465
} else {
416466
Err((ParseError::ExpectedByte(*self), input))
417467
}
@@ -427,6 +477,7 @@ impl<O, F: Fn(&[u8]) -> ParseResult<O>> Parser for F {
427477
type Output<'i> = O;
428478
type Then<T: Parser> = Then2<Self, T>;
429479

480+
#[inline]
430481
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
431482
self(input)
432483
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
use crate::input::InputError;
2+
use crate::parser::Parser;
3+
use std::iter::FusedIterator;
4+
5+
/// An iterator that lazily parses the input using the provided parser.
6+
///
7+
/// See [`Parser::parse_iterator`].
8+
#[derive(Copy, Clone)]
9+
#[must_use = "iterators are lazy and do nothing unless consumed"]
10+
pub struct ParserIterator<'a, P> {
11+
pub(super) input: &'a str,
12+
pub(super) remaining: &'a [u8],
13+
pub(super) parser: P,
14+
}
15+
16+
impl<'a, P: Parser> Iterator for ParserIterator<'a, P> {
17+
type Item = Result<P::Output<'a>, InputError>;
18+
19+
#[inline]
20+
fn next(&mut self) -> Option<Self::Item> {
21+
if self.remaining.is_empty() {
22+
return None;
23+
}
24+
25+
match self.parser.parse(self.remaining) {
26+
Ok((v, remaining)) => {
27+
self.remaining = remaining;
28+
Some(Ok(v))
29+
}
30+
Err((err, remaining)) => {
31+
self.remaining = &[]; // Ensure future calls return None
32+
Some(Err(InputError::new(self.input, remaining, err)))
33+
}
34+
}
35+
}
36+
}
37+
38+
impl<'a, P: Parser> FusedIterator for ParserIterator<'a, P> {}

crates/utils/src/parser/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
mod base;
44
mod combinator;
55
mod error;
6+
mod iterator;
67
mod macros;
78
mod number;
89
mod one_of;
@@ -11,8 +12,9 @@ mod then;
1112

1213
pub use base::*;
1314
pub use error::ParseError;
15+
pub use iterator::ParserIterator;
1416
pub use number::{i128, i16, i32, i64, i8, number_range, u128, u16, u32, u64, u8};
1517
pub use one_of::one_of;
16-
pub use simple::{byte, byte_range, constant, eol, noop, take_while, take_while1};
18+
pub use simple::{byte, byte_range, constant, eof, eol, noop, take_while, take_while1};
1719

1820
pub use crate::parser_literal_map as literal_map;

crates/utils/src/parser/simple.rs

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::parser::then::Then2;
1+
use crate::parser::then::{Then2, Unimplemented};
22
use crate::parser::{ParseError, ParseResult, Parser};
33
use std::ops::RangeInclusive;
44

@@ -139,6 +139,50 @@ pub fn noop() -> Constant<()> {
139139
Constant(())
140140
}
141141

142+
#[derive(Copy, Clone)]
143+
pub struct Eof();
144+
impl Parser for Eof {
145+
type Output<'i> = ();
146+
type Then<T: Parser> = Unimplemented;
147+
148+
#[inline]
149+
fn parse<'i>(&self, input: &'i [u8]) -> ParseResult<'i, Self::Output<'i>> {
150+
match input {
151+
[] => Ok(((), input)),
152+
_ => Err((ParseError::Expected("end of input"), input)),
153+
}
154+
}
155+
156+
fn then<T: Parser>(self, _next: T) -> Self::Then<T> {
157+
panic!("chaining after eof will never match");
158+
}
159+
}
160+
161+
/// Parser which matches the end of the input.
162+
///
163+
/// Useful when parsing a list and each item is separated by a separator, unless it is at the end of
164+
/// the input.
165+
///
166+
/// # Examples
167+
/// ```
168+
/// # use utils::parser::{self, Parser};
169+
/// assert_eq!(
170+
/// parser::eof().parse(b""),
171+
/// Ok(((), &b""[..]))
172+
/// );
173+
/// assert_eq!(
174+
/// parser::u32()
175+
/// .with_suffix(b','.or(parser::eof()))
176+
/// .repeat_n()
177+
/// .parse(b"12,34,56"),
178+
/// Ok(([12, 34, 56], &b""[..]))
179+
/// );
180+
/// ```
181+
#[must_use]
182+
pub fn eof() -> Eof {
183+
Eof()
184+
}
185+
142186
#[derive(Copy, Clone)]
143187
pub struct Eol();
144188
impl Parser for Eol {

crates/year2016/src/day01.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ impl Day01 {
1919
Ok(Self {
2020
instructions: parser::literal_map!("L" => Turn::L, "R" => Turn::R)
2121
.then(parser::u16())
22-
.with_suffix(", ".optional())
22+
.with_suffix(", ".or(parser::eof()))
2323
.parse_all(input)?,
2424
})
2525
}

crates/year2016/src/day08.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,12 @@ impl Day08 {
3131
.then(parser::u32())
3232
.map(|(x, by)| Instruction::RotateCol { x, by });
3333

34-
let instructions = rect.or(rotate_row).or(rotate_col).parse_lines(input)?;
35-
3634
let mut grid = [[false; 50]; 6];
37-
for &instruction in &instructions {
38-
match instruction {
35+
for item in parser::one_of((rect, rotate_row, rotate_col))
36+
.with_suffix(parser::eol())
37+
.parse_iterator(input)
38+
{
39+
match item? {
3940
Instruction::Rect { width, height } => {
4041
for row in &mut grid[..height as usize] {
4142
row[..width as usize].fill(true);

crates/year2017/src/day06.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub struct Day06 {
1313
impl Day06 {
1414
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
1515
let banks = parser::u32()
16-
.with_suffix(b' '.or(b'\t').optional())
16+
.with_suffix(parser::one_of((b' ', b'\t', parser::eof())))
1717
.parse_all(input)?;
1818

1919
let (mut power, mut lambda) = (1, 1);

crates/year2017/src/day08.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub struct Day08 {
1010

1111
impl Day08 {
1212
pub fn new(input: &str, _: InputType) -> Result<Self, InputError> {
13-
let parsed = parser::take_while1(u8::is_ascii_lowercase)
13+
let parse_iterator = parser::take_while1(u8::is_ascii_lowercase)
1414
.with_suffix(" ")
1515
.then(
1616
parser::one_of((
@@ -32,11 +32,13 @@ impl Day08 {
3232
.with_suffix(" "),
3333
)
3434
.then(parser::i32())
35-
.parse_lines(input)?;
35+
.with_suffix(parser::eol())
36+
.parse_iterator(input);
3637

3738
let mut registers = HashMap::new();
3839
let mut max = 0;
39-
for (reg, value, cond_reg, comparison, cond_value) in parsed {
40+
for item in parse_iterator {
41+
let (reg, value, cond_reg, comparison, cond_value) = item?;
4042
if comparison(registers.entry(cond_reg).or_insert(0), &cond_value) {
4143
let entry = registers.entry(reg).or_insert(0);
4244
*entry += value;

crates/year2017/src/day10.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ impl<'a> Day10<'a> {
1616
#[must_use]
1717
pub fn part1(&self) -> u32 {
1818
let lengths = parser::u8()
19-
.with_suffix(b','.optional())
19+
.with_suffix(b','.or(parser::eof()))
2020
.parse_all(self.input)
2121
.expect("input invalid for part 1");
2222

crates/year2017/src/day11.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@ impl Day11 {
2222
"n" => Point2D::new(0, -1),
2323
"s" => Point2D::new(0, 1),
2424
)
25-
.repeat(b',', 1)
26-
.parse_complete(input)?;
25+
.with_suffix(b','.or(parser::eof()))
26+
.parse_iterator(input);
2727

2828
let mut pos = Point2D::new(0, 0);
2929
let mut max = 0;
3030
for step in steps {
31-
pos += step;
31+
pos += step?;
3232
max = max.max(Self::hex_dist_to_origin(pos));
3333
}
3434

0 commit comments

Comments
 (0)