diff --git a/examples/logos.rs b/examples/logos.rs index 72631a00..0bf956ee 100644 --- a/examples/logos.rs +++ b/examples/logos.rs @@ -4,7 +4,7 @@ use ariadne::{Color, Label, Report, ReportKind, Source}; use chumsky::{ - input::{Stream, ValueInput}, + input::{SliceInput, Stream, ValueInput}, prelude::*, }; use logos::Logos; @@ -31,6 +31,9 @@ enum Token<'a> { #[token(")")] RParen, + #[regex("[A-Za-z_]+")] + Ident, + #[regex(r"[ \t\f\n]+", logos::skip)] Whitespace, } @@ -38,7 +41,7 @@ enum Token<'a> { impl<'a> fmt::Display for Token<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::Float(s) => write!(f, "{}", s), + Self::Float(s) => write!(f, "{s}"), Self::Add => write!(f, "+"), Self::Sub => write!(f, "-"), Self::Mul => write!(f, "*"), @@ -46,18 +49,20 @@ impl<'a> fmt::Display for Token<'a> { Self::LParen => write!(f, "("), Self::RParen => write!(f, ")"), Self::Whitespace => write!(f, ""), + Self::Ident => write!(f, ""), Self::Error => write!(f, ""), } } } #[derive(Debug)] -enum SExpr { +enum SExpr<'a> { Float(f64), Add, Sub, Mul, Div, + Ident(&'a str), List(Vec), } @@ -71,9 +76,9 @@ enum SExpr { // - Has an input type of type `I`, the one we declared as a type parameter // - Produces an `SExpr` as its output // - Uses `Rich`, a built-in error type provided by chumsky, for error generation -fn parser<'a, I>() -> impl Parser<'a, I, SExpr, extra::Err>>> +fn parser<'a, I>() -> impl Parser<'a, I, SExpr<'a>, extra::Err>>> where - I: ValueInput<'a, Token = Token<'a>, Span = SimpleSpan>, + I: ValueInput<'a, Token = Token<'a>, Span = SimpleSpan> + SliceInput<'a, Slice = &'a str>, { recursive(|sexpr| { let atom = select! { @@ -84,17 +89,19 @@ where Token::Div => SExpr::Div, }; + let ident = just(Token::Ident).slice().map(SExpr::Ident); + let list = sexpr .repeated() .collect() .map(SExpr::List) .delimited_by(just(Token::LParen), just(Token::RParen)); - atom.or(list) + atom.or(ident).or(list) }) } -impl SExpr { +impl<'a> SExpr<'a> { // Recursively evaluate an s-expression fn eval(&self) -> Result { match self { @@ -103,6 +110,7 @@ impl SExpr { Self::Sub => Err("Cannot evaluate operator '-'"), Self::Mul => Err("Cannot evaluate operator '*'"), Self::Div => Err("Cannot evaluate operator '/'"), + Self::Ident(_) => Err("Identifiers not supported"), Self::List(list) => match &list[..] { [Self::Add, tail @ ..] => tail.iter().map(SExpr::eval).sum(), [Self::Mul, tail @ ..] => tail.iter().map(SExpr::eval).product(), @@ -142,7 +150,8 @@ fn main() { let token_stream = Stream::from_iter(token_iter) // Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us // This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string - .spanned((SRC.len()..SRC.len()).into()); + .spanned((SRC.len()..SRC.len()).into()) + .with_slice(SRC); // Parse the token stream with our chumsky parser match parser().parse(token_stream).into_result() { diff --git a/src/input.rs b/src/input.rs index a41eb31c..a677b25d 100644 --- a/src/input.rs +++ b/src/input.rs @@ -135,6 +135,22 @@ pub trait Input<'a>: Sealed + 'a { phantom: PhantomData, } } + + /// Make this input implement [`SliceInput`] by using the given slice when performing slicing operations. + /// + /// This is useful if you want to have an input that produces 'high-level' tokens be able to refer back to a slice + /// of the raw input that it originated from. + /// + /// # Examples + /// + /// See the `logos` example in the main repository. + fn with_slice(self, slice: S) -> WithSlice + where + Self: Sized, + S: SliceInput<'a>, + { + WithSlice { input: self, slice } + } } /// Implement by inputs that have a known size (including spans) @@ -892,6 +908,105 @@ impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput { } } +/// An input wrapper that provides slices via the given closure. See [`Input::with_slice`]. +#[derive(Copy, Clone)] +pub struct WithSlice { + input: I, + slice: S, +} + +impl Sealed for WithSlice {} +impl<'a, S: 'a, I: Input<'a>> Input<'a> for WithSlice { + type Offset = I::Offset; + type Token = I::Token; + type Span = I::Span; + + #[inline(always)] + fn start(&self) -> Self::Offset { + self.input.start() + } + + type TokenMaybe = I::TokenMaybe; + + #[inline(always)] + unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option) { + self.input.next_maybe(offset) + } + + #[inline(always)] + unsafe fn span(&self, range: Range) -> Self::Span { + self.input.span(range) + } + + #[inline(always)] + fn prev(offs: Self::Offset) -> Self::Offset { + I::prev(offs) + } +} + +impl<'a, S: 'a, I: Input<'a>> ExactSizeInput<'a> for WithSlice +where + S: ExactSizeInput<'a, Span = I::Span, Offset = ::Offset>, +{ + #[inline(always)] + unsafe fn span_from(&self, from: RangeFrom) -> Self::Span { + // SAFETY: offset was generated by previous call to `Input::next` + // TODO: Is this sensible? + let from = unsafe { self.input.span(from.start..from.start) }; + self.slice.span_from(from.start()..) + } +} + +impl<'a, S: 'a, I: ValueInput<'a>> ValueInput<'a> for WithSlice { + #[inline(always)] + unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option) { + self.input.next(offset) + } +} + +impl<'a, S: 'a, I: BorrowInput<'a>> BorrowInput<'a> for WithSlice { + #[inline(always)] + unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) { + self.input.next_ref(offset) + } +} + +impl<'a, S: 'a, I: Input<'a>> SliceInput<'a> for WithSlice +where + S: SliceInput<'a, Span = I::Span, Offset = ::Offset>, +{ + type Slice = S::Slice; + + #[inline(always)] + fn full_slice(&self) -> Self::Slice { + self.slice.full_slice() + } + + #[inline(always)] + fn slice(&self, range: Range) -> Self::Slice { + // SAFETY: offset was generated by previous call to `Input::next` + let span = unsafe { self.input.span(range) }; + self.slice.slice(span.start()..span.end()) + } + + #[inline(always)] + fn slice_from(&self, from: RangeFrom) -> Self::Slice { + // SAFETY: offset was generated by previous call to `Input::next` + // let span = unsafe { self.input.span_from(from) }; + // TODO: Is this sensible? + let span = unsafe { self.input.span(from.start..from.start) }; + self.slice.slice_from(span.start()..) + } +} + +impl<'a, S: 'a, C, I> StrInput<'a, C> for WithSlice +where + I: StrInput<'a, C>, + S: SliceInput<'a, Span = I::Span, Offset = ::Offset, Slice = &'a C::Str>, + C: Char, +{ +} + /// Represents a location in an input that can be rewound to. /// /// Markers can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`].