Skip to content

Commit bb11994

Browse files
authored
Merge pull request #433 from Kmeakin/block-comments
Lex block comments
2 parents 841d48b + c6ebe05 commit bb11994

File tree

5 files changed

+114
-11
lines changed

5 files changed

+114
-11
lines changed

fathom/src/surface/lexer.rs

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use codespan_reporting::diagnostic::{Diagnostic, Label};
2-
use logos::Logos;
2+
use logos::{Filter, Logos};
33

44
use crate::files::FileId;
55
use crate::source::{BytePos, ByteRange};
@@ -13,6 +13,7 @@ pub fn is_keyword(word: &str) -> bool {
1313
}
1414

1515
#[derive(Clone, Debug, Logos)]
16+
#[logos(extras = FileId)]
1617
pub enum Token<'source> {
1718
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
1819
#[regex(r"r#[a-zA-Z_][a-zA-Z0-9_]*", |lex| &lex.slice()[2..])]
@@ -105,23 +106,78 @@ pub enum Token<'source> {
105106
#[token(")")]
106107
CloseParen,
107108

109+
#[token(r"/*", block_comment)]
110+
BlockComment(BlockCommentError),
111+
108112
#[error]
109113
#[regex(r"\p{Whitespace}", logos::skip)]
110114
#[regex(r"//(.*)\n", logos::skip)]
111115
Error,
112116
}
113117

118+
const OPEN: &str = "/*";
119+
const CLOSE: &str = "*/";
120+
const LEN: BytePos = OPEN.len() as BytePos;
121+
122+
fn block_comment<'source>(
123+
lexer: &mut logos::Lexer<'source, Token<'source>>,
124+
) -> Filter<BlockCommentError> {
125+
let start = lexer.span().start as BytePos;
126+
let first_open_pos = start;
127+
let mut last_close_pos = start;
128+
let mut pos = start;
129+
130+
let mut depth: u32 = 1;
131+
while let Some(c) = lexer.remainder().chars().next() {
132+
if lexer.remainder().starts_with(OPEN) {
133+
pos += LEN;
134+
lexer.bump(OPEN.len());
135+
depth += 1;
136+
} else if lexer.remainder().starts_with(CLOSE) {
137+
pos += LEN;
138+
last_close_pos = pos;
139+
lexer.bump(CLOSE.len());
140+
depth -= 1;
141+
if depth == 0 {
142+
break;
143+
}
144+
} else {
145+
pos += c.len_utf8() as BytePos;
146+
lexer.bump(c.len_utf8());
147+
}
148+
}
149+
150+
let file_id = lexer.extras;
151+
match depth {
152+
0 => Filter::Skip,
153+
_ => Filter::Emit(BlockCommentError {
154+
depth,
155+
first_open: ByteRange::new(file_id, first_open_pos, first_open_pos + LEN),
156+
last_close: ByteRange::new(file_id, last_close_pos, last_close_pos + LEN),
157+
}),
158+
}
159+
}
160+
114161
pub type Spanned<Tok, Loc> = (Loc, Tok, Loc);
115162

116163
#[derive(Clone, Debug)]
117164
pub enum Error {
165+
UnclosedBlockComment(BlockCommentError),
118166
UnexpectedCharacter { range: ByteRange },
119167
}
120168

169+
#[derive(Clone, Debug)]
170+
pub struct BlockCommentError {
171+
depth: u32,
172+
first_open: ByteRange,
173+
last_close: ByteRange,
174+
}
175+
121176
impl Error {
122177
pub fn range(&self) -> ByteRange {
123178
match self {
124179
Error::UnexpectedCharacter { range } => *range,
180+
Error::UnclosedBlockComment(BlockCommentError { first_open, .. }) => *first_open,
125181
}
126182
}
127183

@@ -130,6 +186,19 @@ impl Error {
130186
Error::UnexpectedCharacter { range } => Diagnostic::error()
131187
.with_message("unexpected character")
132188
.with_labels(vec![Label::primary(range.file_id(), *range)]),
189+
Error::UnclosedBlockComment(BlockCommentError {
190+
depth,
191+
first_open,
192+
last_close,
193+
}) => Diagnostic::error()
194+
.with_message("unclosed block comment")
195+
.with_labels(vec![
196+
Label::primary(first_open.file_id(), *first_open)
197+
.with_message(format!("first `{OPEN}`")),
198+
Label::primary(last_close.file_id(), *last_close)
199+
.with_message(format!("last `{CLOSE}`")),
200+
])
201+
.with_notes(vec![format!("Help: {depth} more `{CLOSE}` needed",)]),
133202
}
134203
}
135204
}
@@ -143,16 +212,19 @@ pub fn tokens(
143212
"`source` must be less than 4GiB in length"
144213
);
145214

146-
Token::lexer(source).spanned().map(move |(token, range)| {
147-
let start = range.start as BytePos;
148-
let end = range.end as BytePos;
149-
match token {
150-
Token::Error => Err(Error::UnexpectedCharacter {
151-
range: ByteRange::new(file_id, start, end),
152-
}),
153-
token => Ok((start, token, end)),
154-
}
155-
})
215+
Token::lexer_with_extras(source, file_id)
216+
.spanned()
217+
.map(move |(token, range)| {
218+
let start = range.start as BytePos;
219+
let end = range.end as BytePos;
220+
match token {
221+
Token::BlockComment(err) => Err(Error::UnclosedBlockComment(err)),
222+
Token::Error => Err(Error::UnexpectedCharacter {
223+
range: ByteRange::new(file_id, start, end),
224+
}),
225+
token => Ok((start, token, end)),
226+
}
227+
})
156228
}
157229

158230
impl<'source> Token<'source> {
@@ -195,6 +267,7 @@ impl<'source> Token<'source> {
195267
Token::CloseBracket => "]",
196268
Token::OpenParen => "(",
197269
Token::CloseParen => ")",
270+
Token::BlockComment(_) => "block comment",
198271
Token::Error => "error",
199272
Token::BangEquals => "!=",
200273
Token::EqualsEquals => "==",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
let x = /* true */ false;
2+
{}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
stdout = '''
2+
let x : Bool = false; () : ()
3+
'''
4+
stderr = ''
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
//~ exit-code = 1
2+
3+
{}
4+
/* a
5+
/* b
6+
/* c
7+
8+
*/ c
9+
*/ b
10+
// a
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
stdout = ''
2+
stderr = '''
3+
error: unclosed block comment
4+
┌─ tests/fail/parse/unclosed-block-comment.fathom:4:1
5+
6+
4 │ /* a
7+
│ ^^ first `/*`
8+
·
9+
9 │ */ b
10+
│ ^^ last `*/`
11+
12+
= Help: 1 more `*/` needed
13+
14+
'''

0 commit comments

Comments
 (0)