11use codespan_reporting:: diagnostic:: { Diagnostic , Label } ;
2- use logos:: Logos ;
2+ use logos:: { Filter , Logos } ;
33
44use crate :: {
55 files:: FileId ,
@@ -15,6 +15,7 @@ pub fn is_keyword(word: &str) -> bool {
1515}
1616
1717#[ derive( Clone , Debug , Logos ) ]
18+ #[ logos( extras = FileId ) ]
1819pub enum Token < ' source > {
1920 #[ regex( r"[a-zA-Z_][a-zA-Z0-9_]*" ) ]
2021 #[ regex( r"r#[a-zA-Z_][a-zA-Z0-9_]*" , |lex| & lex. slice( ) [ 2 ..] ) ]
@@ -107,23 +108,78 @@ pub enum Token<'source> {
107108 #[ token( ")" ) ]
108109 CloseParen ,
109110
111+ #[ token( r"/*" , block_comment) ]
112+ BlockComment ( BlockCommentError ) ,
113+
110114 #[ error]
111115 #[ regex( r"\p{Whitespace}" , logos:: skip) ]
112116 #[ regex( r"//(.*)\n" , logos:: skip) ]
113117 Error ,
114118}
115119
120+ const OPEN : & str = "/*" ;
121+ const CLOSE : & str = "*/" ;
122+ const LEN : BytePos = OPEN . len ( ) as BytePos ;
123+
124+ fn block_comment < ' source > (
125+ lexer : & mut logos:: Lexer < ' source , Token < ' source > > ,
126+ ) -> Filter < BlockCommentError > {
127+ let start = lexer. span ( ) . start as BytePos ;
128+ let first_open_pos = start;
129+ let mut last_close_pos = start;
130+ let mut pos = start;
131+
132+ let mut depth: u32 = 1 ;
133+ while let Some ( c) = lexer. remainder ( ) . chars ( ) . next ( ) {
134+ if lexer. remainder ( ) . starts_with ( OPEN ) {
135+ pos += LEN ;
136+ lexer. bump ( OPEN . len ( ) ) ;
137+ depth += 1 ;
138+ } else if lexer. remainder ( ) . starts_with ( CLOSE ) {
139+ pos += LEN ;
140+ last_close_pos = pos;
141+ lexer. bump ( CLOSE . len ( ) ) ;
142+ depth -= 1 ;
143+ if depth == 0 {
144+ break ;
145+ }
146+ } else {
147+ pos += c. len_utf8 ( ) as BytePos ;
148+ lexer. bump ( c. len_utf8 ( ) ) ;
149+ }
150+ }
151+
152+ let file_id = lexer. extras ;
153+ match depth {
154+ 0 => Filter :: Skip ,
155+ _ => Filter :: Emit ( BlockCommentError {
156+ depth,
157+ first_open : ByteRange :: new ( file_id, first_open_pos, first_open_pos + LEN ) ,
158+ last_close : ByteRange :: new ( file_id, last_close_pos, last_close_pos + LEN ) ,
159+ } ) ,
160+ }
161+ }
162+
116163pub type Spanned < Tok , Loc > = ( Loc , Tok , Loc ) ;
117164
118165#[ derive( Clone , Debug ) ]
119166pub enum Error {
167+ UnclosedBlockComment ( BlockCommentError ) ,
120168 UnexpectedCharacter { range : ByteRange } ,
121169}
122170
171+ #[ derive( Clone , Debug ) ]
172+ pub struct BlockCommentError {
173+ depth : u32 ,
174+ first_open : ByteRange ,
175+ last_close : ByteRange ,
176+ }
177+
123178impl Error {
124179 pub fn range ( & self ) -> ByteRange {
125180 match self {
126181 Error :: UnexpectedCharacter { range } => * range,
182+ Error :: UnclosedBlockComment ( BlockCommentError { first_open, .. } ) => * first_open,
127183 }
128184 }
129185
@@ -132,6 +188,19 @@ impl Error {
132188 Error :: UnexpectedCharacter { range } => Diagnostic :: error ( )
133189 . with_message ( "unexpected character" )
134190 . with_labels ( vec ! [ Label :: primary( range. file_id( ) , * range) ] ) ,
191+ Error :: UnclosedBlockComment ( BlockCommentError {
192+ depth,
193+ first_open,
194+ last_close,
195+ } ) => Diagnostic :: error ( )
196+ . with_message ( "unclosed block comment" )
197+ . with_labels ( vec ! [
198+ Label :: primary( first_open. file_id( ) , * first_open)
199+ . with_message( format!( "first `{OPEN}`" ) ) ,
200+ Label :: primary( last_close. file_id( ) , * last_close)
201+ . with_message( format!( "last `{CLOSE}`" ) ) ,
202+ ] )
203+ . with_notes ( vec ! [ format!( "Help: {depth} more `{CLOSE}` needed" , ) ] ) ,
135204 }
136205 }
137206}
@@ -145,16 +214,19 @@ pub fn tokens(
145214 "`source` must be less than 4GiB in length"
146215 ) ;
147216
148- Token :: lexer ( source) . spanned ( ) . map ( move |( token, range) | {
149- let start = range. start as BytePos ;
150- let end = range. end as BytePos ;
151- match token {
152- Token :: Error => Err ( Error :: UnexpectedCharacter {
153- range : ByteRange :: new ( file_id, start, end) ,
154- } ) ,
155- token => Ok ( ( start, token, end) ) ,
156- }
157- } )
217+ Token :: lexer_with_extras ( source, file_id)
218+ . spanned ( )
219+ . map ( move |( token, range) | {
220+ let start = range. start as BytePos ;
221+ let end = range. end as BytePos ;
222+ match token {
223+ Token :: BlockComment ( err) => Err ( Error :: UnclosedBlockComment ( err) ) ,
224+ Token :: Error => Err ( Error :: UnexpectedCharacter {
225+ range : ByteRange :: new ( file_id, start, end) ,
226+ } ) ,
227+ token => Ok ( ( start, token, end) ) ,
228+ }
229+ } )
158230}
159231
160232impl < ' source > Token < ' source > {
@@ -197,6 +269,7 @@ impl<'source> Token<'source> {
197269 Token :: CloseBracket => "]" ,
198270 Token :: OpenParen => "(" ,
199271 Token :: CloseParen => ")" ,
272+ Token :: BlockComment ( _) => "block comment" ,
200273 Token :: Error => "error" ,
201274 Token :: BangEquals => "!=" ,
202275 Token :: EqualsEquals => "==" ,
0 commit comments