Skip to content

Commit 378855a

Browse files
authored
Merge pull request #455 from brendanzab/block-comment-lexer-mode
Use lexer modes to parse block comments
2 parents 2f753e1 + 32f45d5 commit 378855a

File tree

2 files changed

+57
-53
lines changed

2 files changed

+57
-53
lines changed

fathom/src/surface/lexer.rs

Lines changed: 56 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -106,54 +106,62 @@ pub enum Token<'source> {
106106
#[token(")")]
107107
CloseParen,
108108

109-
#[token(r"/*", block_comment)]
110-
BlockComment(BlockCommentError),
111-
112109
#[error]
113110
#[regex(r"\p{Whitespace}", logos::skip)]
114111
#[regex(r"//(.*)\n", logos::skip)]
115112
Error,
113+
114+
#[token(r"/*", block_comment)]
115+
ErrorData(Error),
116+
}
117+
118+
#[derive(Clone, Debug, Logos)]
119+
#[logos(extras = FileId)]
120+
enum BlockComment {
121+
#[error]
122+
Skip,
123+
#[token("/*")]
124+
Open,
125+
#[token("*/")]
126+
Close,
116127
}
117128

118-
const OPEN: &str = "/*";
119-
const CLOSE: &str = "*/";
120-
const LEN: BytePos = OPEN.len() as BytePos;
129+
fn lexer_range<'source, T>(lexer: &logos::Lexer<'source, T>) -> ByteRange
130+
where
131+
T: logos::Logos<'source, Extras = FileId>,
132+
{
133+
let span = lexer.span();
134+
ByteRange::new(lexer.extras, span.start as BytePos, span.end as BytePos)
135+
}
121136

122-
fn block_comment<'source>(
123-
lexer: &mut logos::Lexer<'source, Token<'source>>,
124-
) -> Filter<BlockCommentError> {
125-
let start = lexer.span().start as BytePos;
126-
let first_open_pos = start;
127-
let mut last_close_pos = start;
128-
let mut pos = start;
137+
fn block_comment<'source>(lexer: &mut logos::Lexer<'source, Token<'source>>) -> Filter<Error> {
138+
let mut comment_lexer = lexer.to_owned().morph::<BlockComment>();
139+
let first_open = lexer_range(&comment_lexer);
140+
let mut last_close = first_open;
129141

130142
let mut depth: u32 = 1;
131-
while let Some(c) = lexer.remainder().chars().next() {
132-
if lexer.remainder().starts_with(OPEN) {
133-
pos += LEN;
134-
lexer.bump(OPEN.len());
135-
depth += 1;
136-
} else if lexer.remainder().starts_with(CLOSE) {
137-
pos += LEN;
138-
last_close_pos = pos;
139-
lexer.bump(CLOSE.len());
140-
depth -= 1;
141-
if depth == 0 {
142-
break;
143+
while let Some(token) = comment_lexer.next() {
144+
match token {
145+
BlockComment::Skip => {}
146+
BlockComment::Open => depth += 1,
147+
BlockComment::Close => {
148+
depth -= 1;
149+
last_close = lexer_range(&comment_lexer);
150+
if depth == 0 {
151+
break;
152+
}
143153
}
144-
} else {
145-
pos += c.len_utf8() as BytePos;
146-
lexer.bump(c.len_utf8());
147154
}
148155
}
149156

150-
let file_id = lexer.extras;
157+
*lexer = comment_lexer.morph::<Token>();
158+
151159
match depth {
152160
0 => Filter::Skip,
153-
_ => Filter::Emit(BlockCommentError {
161+
_ => Filter::Emit(Error::UnclosedBlockComment {
154162
depth,
155-
first_open: ByteRange::new(file_id, first_open_pos, first_open_pos + LEN),
156-
last_close: ByteRange::new(file_id, last_close_pos, last_close_pos + LEN),
163+
first_open,
164+
last_close,
157165
}),
158166
}
159167
}
@@ -162,22 +170,21 @@ pub type Spanned<Tok, Loc> = (Loc, Tok, Loc);
162170

163171
#[derive(Clone, Debug)]
164172
pub enum Error {
165-
UnclosedBlockComment(BlockCommentError),
166-
UnexpectedCharacter { range: ByteRange },
167-
}
168-
169-
#[derive(Clone, Debug)]
170-
pub struct BlockCommentError {
171-
depth: u32,
172-
first_open: ByteRange,
173-
last_close: ByteRange,
173+
UnclosedBlockComment {
174+
depth: u32,
175+
first_open: ByteRange,
176+
last_close: ByteRange,
177+
},
178+
UnexpectedCharacter {
179+
range: ByteRange,
180+
},
174181
}
175182

176183
impl Error {
177184
pub fn range(&self) -> ByteRange {
178185
match self {
179186
Error::UnexpectedCharacter { range } => *range,
180-
Error::UnclosedBlockComment(BlockCommentError { first_open, .. }) => *first_open,
187+
Error::UnclosedBlockComment { first_open, .. } => *first_open,
181188
}
182189
}
183190

@@ -186,19 +193,17 @@ impl Error {
186193
Error::UnexpectedCharacter { range } => Diagnostic::error()
187194
.with_message("unexpected character")
188195
.with_labels(vec![Label::primary(range.file_id(), *range)]),
189-
Error::UnclosedBlockComment(BlockCommentError {
196+
Error::UnclosedBlockComment {
190197
depth,
191198
first_open,
192199
last_close,
193-
}) => Diagnostic::error()
200+
} => Diagnostic::error()
194201
.with_message("unclosed block comment")
195202
.with_labels(vec![
196-
Label::primary(first_open.file_id(), *first_open)
197-
.with_message(format!("first `{OPEN}`")),
198-
Label::primary(last_close.file_id(), *last_close)
199-
.with_message(format!("last `{CLOSE}`")),
203+
Label::primary(first_open.file_id(), *first_open).with_message("first `/*`"),
204+
Label::primary(last_close.file_id(), *last_close).with_message("last `*/`"),
200205
])
201-
.with_notes(vec![format!("Help: {depth} more `{CLOSE}` needed",)]),
206+
.with_notes(vec![format!("help: {depth} more `*/` needed")]),
202207
}
203208
}
204209
}
@@ -218,7 +223,7 @@ pub fn tokens(
218223
let start = range.start as BytePos;
219224
let end = range.end as BytePos;
220225
match token {
221-
Token::BlockComment(err) => Err(Error::UnclosedBlockComment(err)),
226+
Token::ErrorData(err) => Err(err),
222227
Token::Error => Err(Error::UnexpectedCharacter {
223228
range: ByteRange::new(file_id, start, end),
224229
}),
@@ -267,8 +272,7 @@ impl<'source> Token<'source> {
267272
Token::CloseBracket => "]",
268273
Token::OpenParen => "(",
269274
Token::CloseParen => ")",
270-
Token::BlockComment(_) => "block comment",
271-
Token::Error => "error",
275+
Token::Error | Token::ErrorData(_) => "error",
272276
Token::BangEquals => "!=",
273277
Token::EqualsEquals => "==",
274278
Token::GreaterEquals => ">=",

tests/fail/parse/unclosed-block-comment.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ error: unclosed block comment
99
9 │ */ b
1010
^^ last `*/`
1111
12-
= Help: 1 more `*/` needed
12+
= help: 1 more `*/` needed
1313

1414
'''

0 commit comments

Comments
 (0)