11use codespan_reporting::diagnostic::{Diagnostic, Label};
2- use logos::Logos;
2+ use logos::{Filter, Logos} ;
33
44use crate::files::FileId;
55use crate::source::{BytePos, ByteRange};
@@ -13,6 +13,7 @@ pub fn is_keyword(word: &str) -> bool {
1313}
1414
1515#[derive(Clone, Debug, Logos)]
16+ #[logos(extras = FileId)]
1617pub enum Token<'source> {
1718 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
1819 #[regex(r"r#[a-zA-Z_][a-zA-Z0-9_]*", |lex| &lex.slice()[2..])]
@@ -105,23 +106,78 @@ pub enum Token<'source> {
105106 #[token(")")]
106107 CloseParen,
107108
109+ #[token(r"/*", block_comment)]
110+ BlockComment(BlockCommentError),
111+
108112 #[error]
109113 #[regex(r"\p{Whitespace}", logos::skip)]
110114 #[regex(r"//(.*)\n", logos::skip)]
111115 Error,
112116}
113117
118+ const OPEN: &str = "/*";
119+ const CLOSE: &str = "*/";
120+ const LEN: BytePos = OPEN.len() as BytePos;
121+
122+ fn block_comment<'source>(
123+ lexer: &mut logos::Lexer<'source, Token<'source>>,
124+ ) -> Filter<BlockCommentError> {
125+ let start = lexer.span().start as BytePos;
126+ let first_open_pos = start;
127+ let mut last_close_pos = start;
128+ let mut pos = start;
129+
130+ let mut depth: u32 = 1;
131+ while let Some(c) = lexer.remainder().chars().next() {
132+ if lexer.remainder().starts_with(OPEN) {
133+ pos += LEN;
134+ lexer.bump(OPEN.len());
135+ depth += 1;
136+ } else if lexer.remainder().starts_with(CLOSE) {
137+ pos += LEN;
138+ last_close_pos = pos;
139+ lexer.bump(CLOSE.len());
140+ depth -= 1;
141+ if depth == 0 {
142+ break;
143+ }
144+ } else {
145+ pos += c.len_utf8() as BytePos;
146+ lexer.bump(c.len_utf8());
147+ }
148+ }
149+
150+ let file_id = lexer.extras;
151+ match depth {
152+ 0 => Filter::Skip,
153+ _ => Filter::Emit(BlockCommentError {
154+ depth,
155+ first_open: ByteRange::new(file_id, first_open_pos, first_open_pos + LEN),
156+ last_close: ByteRange::new(file_id, last_close_pos, last_close_pos + LEN),
157+ }),
158+ }
159+ }
160+
114161pub type Spanned<Tok, Loc> = (Loc, Tok, Loc);
115162
116163#[derive(Clone, Debug)]
117164pub enum Error {
165+ UnclosedBlockComment(BlockCommentError),
118166 UnexpectedCharacter { range: ByteRange },
119167}
120168
169+ #[derive(Clone, Debug)]
170+ pub struct BlockCommentError {
171+ depth: u32,
172+ first_open: ByteRange,
173+ last_close: ByteRange,
174+ }
175+
121176impl Error {
122177 pub fn range(&self) -> ByteRange {
123178 match self {
124179 Error::UnexpectedCharacter { range } => *range,
180+ Error::UnclosedBlockComment(BlockCommentError { first_open, .. }) => *first_open,
125181 }
126182 }
127183
@@ -130,6 +186,19 @@ impl Error {
130186 Error::UnexpectedCharacter { range } => Diagnostic::error()
131187 .with_message("unexpected character")
132188 .with_labels(vec![Label::primary(range.file_id(), *range)]),
189+ Error::UnclosedBlockComment(BlockCommentError {
190+ depth,
191+ first_open,
192+ last_close,
193+ }) => Diagnostic::error()
194+ .with_message("unclosed block comment")
195+ .with_labels(vec![
196+ Label::primary(first_open.file_id(), *first_open)
197+ .with_message(format!("first `{OPEN}`")),
198+ Label::primary(last_close.file_id(), *last_close)
199+ .with_message(format!("last `{CLOSE}`")),
200+ ])
201+ .with_notes(vec![format!("Help: {depth} more `{CLOSE}` needed",)]),
133202 }
134203 }
135204}
@@ -143,16 +212,19 @@ pub fn tokens(
143212 "`source` must be less than 4GiB in length"
144213 );
145214
146- Token::lexer(source).spanned().map(move |(token, range)| {
147- let start = range.start as BytePos;
148- let end = range.end as BytePos;
149- match token {
150- Token::Error => Err(Error::UnexpectedCharacter {
151- range: ByteRange::new(file_id, start, end),
152- }),
153- token => Ok((start, token, end)),
154- }
155- })
215+ Token::lexer_with_extras(source, file_id)
216+ .spanned()
217+ .map(move |(token, range)| {
218+ let start = range.start as BytePos;
219+ let end = range.end as BytePos;
220+ match token {
221+ Token::BlockComment(err) => Err(Error::UnclosedBlockComment(err)),
222+ Token::Error => Err(Error::UnexpectedCharacter {
223+ range: ByteRange::new(file_id, start, end),
224+ }),
225+ token => Ok((start, token, end)),
226+ }
227+ })
156228}
157229
158230impl<'source> Token<'source> {
@@ -195,6 +267,7 @@ impl<'source> Token<'source> {
195267 Token::CloseBracket => "]",
196268 Token::OpenParen => "(",
197269 Token::CloseParen => ")",
270+ Token::BlockComment(_) => "block comment",
198271 Token::Error => "error",
199272 Token::BangEquals => "!=",
200273 Token::EqualsEquals => "==",
0 commit comments