Skip to content

Commit c6ebe05

Browse files
committed
Lex block comments
1 parent d9d16f3 commit c6ebe05

File tree

5 files changed

+114
-11
lines changed

5 files changed

+114
-11
lines changed

fathom/src/surface/lexer.rs

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use codespan_reporting::diagnostic::{Diagnostic, Label};
2-
use logos::Logos;
2+
use logos::{Filter, Logos};
33

44
use crate::{
55
files::FileId,
@@ -15,6 +15,7 @@ pub fn is_keyword(word: &str) -> bool {
1515
}
1616

1717
#[derive(Clone, Debug, Logos)]
18+
#[logos(extras = FileId)]
1819
pub enum Token<'source> {
1920
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
2021
#[regex(r"r#[a-zA-Z_][a-zA-Z0-9_]*", |lex| &lex.slice()[2..])]
@@ -107,23 +108,78 @@ pub enum Token<'source> {
107108
#[token(")")]
108109
CloseParen,
109110

111+
#[token(r"/*", block_comment)]
112+
BlockComment(BlockCommentError),
113+
110114
#[error]
111115
#[regex(r"\p{Whitespace}", logos::skip)]
112116
#[regex(r"//(.*)\n", logos::skip)]
113117
Error,
114118
}
115119

120+
const OPEN: &str = "/*";
121+
const CLOSE: &str = "*/";
122+
const LEN: BytePos = OPEN.len() as BytePos;
123+
124+
fn block_comment<'source>(
125+
lexer: &mut logos::Lexer<'source, Token<'source>>,
126+
) -> Filter<BlockCommentError> {
127+
let start = lexer.span().start as BytePos;
128+
let first_open_pos = start;
129+
let mut last_close_pos = start;
130+
let mut pos = start;
131+
132+
let mut depth: u32 = 1;
133+
while let Some(c) = lexer.remainder().chars().next() {
134+
if lexer.remainder().starts_with(OPEN) {
135+
pos += LEN;
136+
lexer.bump(OPEN.len());
137+
depth += 1;
138+
} else if lexer.remainder().starts_with(CLOSE) {
139+
pos += LEN;
140+
last_close_pos = pos;
141+
lexer.bump(CLOSE.len());
142+
depth -= 1;
143+
if depth == 0 {
144+
break;
145+
}
146+
} else {
147+
pos += c.len_utf8() as BytePos;
148+
lexer.bump(c.len_utf8());
149+
}
150+
}
151+
152+
let file_id = lexer.extras;
153+
match depth {
154+
0 => Filter::Skip,
155+
_ => Filter::Emit(BlockCommentError {
156+
depth,
157+
first_open: ByteRange::new(file_id, first_open_pos, first_open_pos + LEN),
158+
last_close: ByteRange::new(file_id, last_close_pos, last_close_pos + LEN),
159+
}),
160+
}
161+
}
162+
116163
pub type Spanned<Tok, Loc> = (Loc, Tok, Loc);
117164

118165
#[derive(Clone, Debug)]
119166
pub enum Error {
167+
UnclosedBlockComment(BlockCommentError),
120168
UnexpectedCharacter { range: ByteRange },
121169
}
122170

171+
#[derive(Clone, Debug)]
172+
pub struct BlockCommentError {
173+
depth: u32,
174+
first_open: ByteRange,
175+
last_close: ByteRange,
176+
}
177+
123178
impl Error {
124179
pub fn range(&self) -> ByteRange {
125180
match self {
126181
Error::UnexpectedCharacter { range } => *range,
182+
Error::UnclosedBlockComment(BlockCommentError { first_open, .. }) => *first_open,
127183
}
128184
}
129185

@@ -132,6 +188,19 @@ impl Error {
132188
Error::UnexpectedCharacter { range } => Diagnostic::error()
133189
.with_message("unexpected character")
134190
.with_labels(vec![Label::primary(range.file_id(), *range)]),
191+
Error::UnclosedBlockComment(BlockCommentError {
192+
depth,
193+
first_open,
194+
last_close,
195+
}) => Diagnostic::error()
196+
.with_message("unclosed block comment")
197+
.with_labels(vec![
198+
Label::primary(first_open.file_id(), *first_open)
199+
.with_message(format!("first `{OPEN}`")),
200+
Label::primary(last_close.file_id(), *last_close)
201+
.with_message(format!("last `{CLOSE}`")),
202+
])
203+
.with_notes(vec![format!("Help: {depth} more `{CLOSE}` needed",)]),
135204
}
136205
}
137206
}
@@ -145,16 +214,19 @@ pub fn tokens(
145214
"`source` must be less than 4GiB in length"
146215
);
147216

148-
Token::lexer(source).spanned().map(move |(token, range)| {
149-
let start = range.start as BytePos;
150-
let end = range.end as BytePos;
151-
match token {
152-
Token::Error => Err(Error::UnexpectedCharacter {
153-
range: ByteRange::new(file_id, start, end),
154-
}),
155-
token => Ok((start, token, end)),
156-
}
157-
})
217+
Token::lexer_with_extras(source, file_id)
218+
.spanned()
219+
.map(move |(token, range)| {
220+
let start = range.start as BytePos;
221+
let end = range.end as BytePos;
222+
match token {
223+
Token::BlockComment(err) => Err(Error::UnclosedBlockComment(err)),
224+
Token::Error => Err(Error::UnexpectedCharacter {
225+
range: ByteRange::new(file_id, start, end),
226+
}),
227+
token => Ok((start, token, end)),
228+
}
229+
})
158230
}
159231

160232
impl<'source> Token<'source> {
@@ -197,6 +269,7 @@ impl<'source> Token<'source> {
197269
Token::CloseBracket => "]",
198270
Token::OpenParen => "(",
199271
Token::CloseParen => ")",
272+
Token::BlockComment(_) => "block comment",
200273
Token::Error => "error",
201274
Token::BangEquals => "!=",
202275
Token::EqualsEquals => "==",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
let x = /* true */ false;
2+
{}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
stdout = '''
2+
let x : Bool = false; () : ()
3+
'''
4+
stderr = ''
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
//~ exit-code = 1
2+
3+
{}
4+
/* a
5+
/* b
6+
/* c
7+
8+
*/ c
9+
*/ b
10+
// a
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
stdout = ''
2+
stderr = '''
3+
error: unclosed block comment
4+
┌─ tests/fail/parse/unclosed-block-comment.fathom:4:1
5+
6+
4/* a
7+
│ ^^ first `/*`
8+
·
9+
9 │ */ b
10+
^^ last `*/`
11+
12+
= Help: 1 more `*/` needed
13+
14+
'''

0 commit comments

Comments
 (0)