Skip to content
This repository was archived by the owner on Apr 2, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions examples/logos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use ariadne::{Color, Label, Report, ReportKind, Source};
use chumsky::{
input::{Stream, ValueInput},
input::{SliceInput, Stream, ValueInput},
prelude::*,
};
use logos::Logos;
Expand All @@ -31,33 +31,38 @@ enum Token<'a> {
#[token(")")]
RParen,

#[regex("[A-Za-z_]+")]
Ident,

#[regex(r"[ \t\f\n]+", logos::skip)]
Whitespace,
Comment on lines 37 to 38
Copy link
Copy Markdown

@Amejonah1200 Amejonah1200 Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be moved under the #[derive(...)] using #[logos(skip r"[ \t\f\n]+")]

}

impl<'a> fmt::Display for Token<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Float(s) => write!(f, "{}", s),
Self::Float(s) => write!(f, "{s}"),
Self::Add => write!(f, "+"),
Self::Sub => write!(f, "-"),
Self::Mul => write!(f, "*"),
Self::Div => write!(f, "/"),
Self::LParen => write!(f, "("),
Self::RParen => write!(f, ")"),
Self::Whitespace => write!(f, "<whitespace>"),
Self::Ident => write!(f, "<ident>"),
Self::Error => write!(f, "<error>"),
}
}
}

#[derive(Debug)]
enum SExpr {
enum SExpr<'a> {
Float(f64),
Add,
Sub,
Mul,
Div,
Ident(&'a str),
List(Vec<Self>),
}

Expand All @@ -71,9 +76,9 @@ enum SExpr {
// - Has an input type of type `I`, the one we declared as a type parameter
// - Produces an `SExpr` as its output
// - Uses `Rich`, a built-in error type provided by chumsky, for error generation
fn parser<'a, I>() -> impl Parser<'a, I, SExpr, extra::Err<Rich<'a, Token<'a>>>>
fn parser<'a, I>() -> impl Parser<'a, I, SExpr<'a>, extra::Err<Rich<'a, Token<'a>>>>
where
I: ValueInput<'a, Token = Token<'a>, Span = SimpleSpan>,
I: ValueInput<'a, Token = Token<'a>, Span = SimpleSpan> + SliceInput<'a, Slice = &'a str>,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tbf, to avaid humongous types, trait "aliases" should be used.

pub type TokenParserExtra<'a> = Full<Rich<'a, Token>, (), ()>;

pub trait TokenInput<'a>: ValueInput<'a, Token = Token, Span = SimpleSpan> {}
impl<'a, T> TokenInput<'a> for T where T: ValueInput<'a, Token = Token, Span = SimpleSpan> {}

pub trait TokenParser<'a, I: TokenInput<'a>, O>:
    Parser<'a, I, O, TokenParserExtra<'a>> + Clone {
}
impl<'a, I: TokenInput<'a>, O, T> TokenParser<'a, I, O> for T where T: Parser<'a, I, O, TokenParserExtra<'a>> + Clone {}

until real trait aliases are available

{
recursive(|sexpr| {
let atom = select! {
Expand All @@ -84,17 +89,19 @@ where
Token::Div => SExpr::Div,
};

let ident = just(Token::Ident).slice().map(SExpr::Ident);

let list = sexpr
.repeated()
.collect()
.map(SExpr::List)
.delimited_by(just(Token::LParen), just(Token::RParen));

atom.or(list)
atom.or(ident).or(list)
})
}

impl SExpr {
impl<'a> SExpr<'a> {
// Recursively evaluate an s-expression
fn eval(&self) -> Result<f64, &'static str> {
match self {
Expand All @@ -103,6 +110,7 @@ impl SExpr {
Self::Sub => Err("Cannot evaluate operator '-'"),
Self::Mul => Err("Cannot evaluate operator '*'"),
Self::Div => Err("Cannot evaluate operator '/'"),
Self::Ident(_) => Err("Identifiers not supported"),
Self::List(list) => match &list[..] {
[Self::Add, tail @ ..] => tail.iter().map(SExpr::eval).sum(),
[Self::Mul, tail @ ..] => tail.iter().map(SExpr::eval).product(),
Expand Down Expand Up @@ -142,7 +150,8 @@ fn main() {
let token_stream = Stream::from_iter(token_iter)
// Tell chumsky to split the (Token, SimpleSpan) stream into its parts so that it can handle the spans for us
// This involves giving chumsky an 'end of input' span: we just use a zero-width span at the end of the string
.spanned((SRC.len()..SRC.len()).into());
.spanned((SRC.len()..SRC.len()).into())
.with_slice(SRC);

// Parse the token stream with our chumsky parser
match parser().parse(token_stream).into_result() {
Expand Down
115 changes: 115 additions & 0 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,22 @@ pub trait Input<'a>: Sealed + 'a {
phantom: PhantomData,
}
}

/// Make this input implement [`SliceInput`] by using the given slice when performing slicing operations.
///
/// This is useful if you want to have an input that produces 'high-level' tokens be able to refer back to a slice
/// of the raw input that it originated from.
///
/// # Examples
///
/// See the `logos` example in the main repository.
fn with_slice<S>(self, slice: S) -> WithSlice<S, Self>
where
Self: Sized,
S: SliceInput<'a>,
{
WithSlice { input: self, slice }
}
}

/// Implement by inputs that have a known size (including spans)
Expand Down Expand Up @@ -892,6 +908,105 @@ impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput<R> {
}
}

/// An input wrapper that provides slices via the given closure. See [`Input::with_slice`].
#[derive(Copy, Clone)]
pub struct WithSlice<S, I> {
input: I,
slice: S,
}

impl<S, I> Sealed for WithSlice<S, I> {}
impl<'a, S: 'a, I: Input<'a>> Input<'a> for WithSlice<S, I> {
type Offset = I::Offset;
type Token = I::Token;
type Span = I::Span;

#[inline(always)]
fn start(&self) -> Self::Offset {
self.input.start()
}

type TokenMaybe = I::TokenMaybe;

#[inline(always)]
unsafe fn next_maybe(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::TokenMaybe>) {
self.input.next_maybe(offset)
}

#[inline(always)]
unsafe fn span(&self, range: Range<Self::Offset>) -> Self::Span {
self.input.span(range)
}

#[inline(always)]
fn prev(offs: Self::Offset) -> Self::Offset {
I::prev(offs)
}
}

impl<'a, S: 'a, I: Input<'a>> ExactSizeInput<'a> for WithSlice<S, I>
where
S: ExactSizeInput<'a, Span = I::Span, Offset = <I::Span as Span>::Offset>,
{
#[inline(always)]
unsafe fn span_from(&self, from: RangeFrom<Self::Offset>) -> Self::Span {
// SAFETY: offset was generated by previous call to `Input::next`
// TODO: Is this sensible?
let from = unsafe { self.input.span(from.start..from.start) };
self.slice.span_from(from.start()..)
}
}

impl<'a, S: 'a, I: ValueInput<'a>> ValueInput<'a> for WithSlice<S, I> {
#[inline(always)]
unsafe fn next(&self, offset: Self::Offset) -> (Self::Offset, Option<Self::Token>) {
self.input.next(offset)
}
}

impl<'a, S: 'a, I: BorrowInput<'a>> BorrowInput<'a> for WithSlice<S, I> {
#[inline(always)]
unsafe fn next_ref(&self, offset: Self::Offset) -> (Self::Offset, Option<&'a Self::Token>) {
self.input.next_ref(offset)
}
}

impl<'a, S: 'a, I: Input<'a>> SliceInput<'a> for WithSlice<S, I>
where
S: SliceInput<'a, Span = I::Span, Offset = <I::Span as Span>::Offset>,
{
type Slice = S::Slice;

#[inline(always)]
fn full_slice(&self) -> Self::Slice {
self.slice.full_slice()
}

#[inline(always)]
fn slice(&self, range: Range<Self::Offset>) -> Self::Slice {
// SAFETY: offset was generated by previous call to `Input::next`
let span = unsafe { self.input.span(range) };
self.slice.slice(span.start()..span.end())
}

#[inline(always)]
fn slice_from(&self, from: RangeFrom<Self::Offset>) -> Self::Slice {
// SAFETY: offset was generated by previous call to `Input::next`
// let span = unsafe { self.input.span_from(from) };
// TODO: Is this sensible?
let span = unsafe { self.input.span(from.start..from.start) };
self.slice.slice_from(span.start()..)
}
}

impl<'a, S: 'a, C, I> StrInput<'a, C> for WithSlice<S, I>
where
I: StrInput<'a, C>,
S: SliceInput<'a, Span = I::Span, Offset = <I::Span as Span>::Offset, Slice = &'a C::Str>,
C: Char<Str = S>,
{
}

/// Represents a location in an input that can be rewound to.
///
/// Markers can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`].
Expand Down