22
33use ra_parser:: { FragmentKind , ParseError , TreeSink } ;
44use ra_syntax:: {
5- ast, AstToken , NodeOrToken , Parse , SmolStr , SyntaxKind , SyntaxKind :: * , SyntaxNode ,
6- SyntaxTreeBuilder , TextRange , TextUnit , T ,
5+ ast:: { self , make:: tokens:: doc_comment} ,
6+ tokenize, AstToken , NodeOrToken , Parse , SmolStr , SyntaxKind ,
7+ SyntaxKind :: * ,
8+ SyntaxNode , SyntaxTreeBuilder , TextRange , TextUnit , Token , T ,
79} ;
810use rustc_hash:: FxHashMap ;
911use std:: iter:: successors;
@@ -48,9 +50,11 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM
4850/// will consume).
4951pub fn syntax_node_to_token_tree ( node : & SyntaxNode ) -> Option < ( tt:: Subtree , TokenMap ) > {
5052 let global_offset = node. text_range ( ) . start ( ) ;
51- let mut c = Convertor { map : TokenMap :: default ( ) , global_offset, next_id : 0 } ;
53+ let mut c = Convertor {
54+ id_alloc : { TokenIdAlloc { map : TokenMap :: default ( ) , global_offset, next_id : 0 } } ,
55+ } ;
5256 let subtree = c. go ( node) ?;
53- Some ( ( subtree, c. map ) )
57+ Some ( ( subtree, c. id_alloc . map ) )
5458}
5559
5660// The following items are what `rustc` macro can be parsed into :
@@ -89,6 +93,28 @@ pub fn token_tree_to_syntax_node(
8993 Ok ( ( parse, range_map) )
9094}
9195
96+ /// Convert a string to a `TokenTree`
97+ pub fn parse_to_token_tree ( text : & str ) -> Option < ( tt:: Subtree , TokenMap ) > {
98+ let ( tokens, errors) = tokenize ( text) ;
99+ if !errors. is_empty ( ) {
100+ return None ;
101+ }
102+
103+ let mut conv = RawConvertor {
104+ text,
105+ offset : TextUnit :: default ( ) ,
106+ inner : tokens. iter ( ) ,
107+ id_alloc : TokenIdAlloc {
108+ map : Default :: default ( ) ,
109+ global_offset : TextUnit :: default ( ) ,
110+ next_id : 0 ,
111+ } ,
112+ } ;
113+
114+ let subtree = conv. go ( ) ?;
115+ Some ( ( subtree, conv. id_alloc . map ) )
116+ }
117+
92118impl TokenMap {
93119 pub fn token_by_range ( & self , relative_range : TextRange ) -> Option < tt:: TokenId > {
94120 let & ( token_id, _) = self . entries . iter ( ) . find ( |( _, range) | match range {
@@ -118,6 +144,14 @@ impl TokenMap {
118144 self . entries
119145 . push ( ( token_id, TokenTextRange :: Delimiter ( open_relative_range, close_relative_range) ) ) ;
120146 }
147+
148+ fn update_close_delim ( & mut self , token_id : tt:: TokenId , close_relative_range : TextRange ) {
149+ if let Some ( entry) = self . entries . iter_mut ( ) . find ( |( tid, _) | * tid == token_id) {
150+ if let TokenTextRange :: Delimiter ( dim, _) = entry. 1 {
151+ entry. 1 = TokenTextRange :: Delimiter ( dim, close_relative_range) ;
152+ }
153+ }
154+ }
121155}
122156
123157/// Returns the textual content of a doc comment block as a quoted string
@@ -188,12 +222,161 @@ fn convert_doc_comment(token: &ra_syntax::SyntaxToken) -> Option<Vec<tt::TokenTr
188222 }
189223}
190224
191- struct Convertor {
225+ struct TokenIdAlloc {
192226 map : TokenMap ,
193227 global_offset : TextUnit ,
194228 next_id : u32 ,
195229}
196230
231+ impl TokenIdAlloc {
232+ fn alloc ( & mut self , absolute_range : TextRange ) -> tt:: TokenId {
233+ let relative_range = absolute_range - self . global_offset ;
234+ let token_id = tt:: TokenId ( self . next_id ) ;
235+ self . next_id += 1 ;
236+ self . map . insert ( token_id, relative_range) ;
237+ token_id
238+ }
239+
240+ fn delim ( & mut self , open_abs_range : TextRange , close_abs_range : TextRange ) -> tt:: TokenId {
241+ let open_relative_range = open_abs_range - self . global_offset ;
242+ let close_relative_range = close_abs_range - self . global_offset ;
243+ let token_id = tt:: TokenId ( self . next_id ) ;
244+ self . next_id += 1 ;
245+
246+ self . map . insert_delim ( token_id, open_relative_range, close_relative_range) ;
247+ token_id
248+ }
249+
250+ fn open_delim ( & mut self , open_abs_range : TextRange ) -> tt:: TokenId {
251+ let token_id = tt:: TokenId ( self . next_id ) ;
252+ self . next_id += 1 ;
253+ self . map . insert_delim ( token_id, open_abs_range, open_abs_range) ;
254+ token_id
255+ }
256+
257+ fn close_delim ( & mut self , id : tt:: TokenId , close_abs_range : TextRange ) {
258+ self . map . update_close_delim ( id, close_abs_range) ;
259+ }
260+ }
261+
262+ /// A Raw Token (straightly from lexer) convertor
263+ struct RawConvertor < ' a > {
264+ text : & ' a str ,
265+ offset : TextUnit ,
266+ id_alloc : TokenIdAlloc ,
267+ inner : std:: slice:: Iter < ' a , Token > ,
268+ }
269+
270+ impl RawConvertor < ' _ > {
271+ fn go ( & mut self ) -> Option < tt:: Subtree > {
272+ let mut subtree = tt:: Subtree :: default ( ) ;
273+ subtree. delimiter = None ;
274+ while self . peek ( ) . is_some ( ) {
275+ self . collect_leaf ( & mut subtree. token_trees ) ;
276+ }
277+ if subtree. token_trees . is_empty ( ) {
278+ return None ;
279+ }
280+ if subtree. token_trees . len ( ) == 1 {
281+ if let tt:: TokenTree :: Subtree ( first) = & subtree. token_trees [ 0 ] {
282+ return Some ( first. clone ( ) ) ;
283+ }
284+ }
285+ Some ( subtree)
286+ }
287+
288+ fn bump ( & mut self ) -> Option < ( Token , TextRange ) > {
289+ let token = self . inner . next ( ) ?;
290+ let range = TextRange :: offset_len ( self . offset , token. len ) ;
291+ self . offset += token. len ;
292+ Some ( ( * token, range) )
293+ }
294+
295+ fn peek ( & self ) -> Option < Token > {
296+ self . inner . as_slice ( ) . get ( 0 ) . cloned ( )
297+ }
298+
299+ fn collect_leaf ( & mut self , result : & mut Vec < tt:: TokenTree > ) {
300+ let ( token, range) = match self . bump ( ) {
301+ None => return ,
302+ Some ( it) => it,
303+ } ;
304+
305+ let k: SyntaxKind = token. kind ;
306+ if k == COMMENT {
307+ let node = doc_comment ( & self . text [ range] ) ;
308+ if let Some ( tokens) = convert_doc_comment ( & node) {
309+ result. extend ( tokens) ;
310+ }
311+ return ;
312+ }
313+
314+ result. push ( if k. is_punct ( ) {
315+ let delim = match k {
316+ T ! [ '(' ] => Some ( ( tt:: DelimiterKind :: Parenthesis , T ! [ ')' ] ) ) ,
317+ T ! [ '{' ] => Some ( ( tt:: DelimiterKind :: Brace , T ! [ '}' ] ) ) ,
318+ T ! [ '[' ] => Some ( ( tt:: DelimiterKind :: Bracket , T ! [ ']' ] ) ) ,
319+ _ => None ,
320+ } ;
321+
322+ if let Some ( ( kind, closed) ) = delim {
323+ let mut subtree = tt:: Subtree :: default ( ) ;
324+ let id = self . id_alloc . open_delim ( range) ;
325+ subtree. delimiter = Some ( tt:: Delimiter { kind, id } ) ;
326+
327+ while self . peek ( ) . map ( |it| it. kind != closed) . unwrap_or ( false ) {
328+ self . collect_leaf ( & mut subtree. token_trees ) ;
329+ }
330+ let last_range = match self . bump ( ) {
331+ None => return ,
332+ Some ( it) => it. 1 ,
333+ } ;
334+ self . id_alloc . close_delim ( id, last_range) ;
335+ subtree. into ( )
336+ } else {
337+ let spacing = match self . peek ( ) {
338+ Some ( next)
339+ if next. kind . is_trivia ( )
340+ || next. kind == T ! [ '[' ]
341+ || next. kind == T ! [ '{' ]
342+ || next. kind == T ! [ '(' ] =>
343+ {
344+ tt:: Spacing :: Alone
345+ }
346+ Some ( next) if next. kind . is_punct ( ) => tt:: Spacing :: Joint ,
347+ _ => tt:: Spacing :: Alone ,
348+ } ;
349+ let char =
350+ self . text [ range] . chars ( ) . next ( ) . expect ( "Token from lexer must be single char" ) ;
351+
352+ tt:: Leaf :: from ( tt:: Punct { char, spacing, id : self . id_alloc . alloc ( range) } ) . into ( )
353+ }
354+ } else {
355+ macro_rules! make_leaf {
356+ ( $i: ident) => {
357+ tt:: $i { id: self . id_alloc. alloc( range) , text: self . text[ range] . into( ) } . into( )
358+ } ;
359+ }
360+ let leaf: tt:: Leaf = match k {
361+ T ! [ true ] | T ! [ false ] => make_leaf ! ( Literal ) ,
362+ IDENT | LIFETIME => make_leaf ! ( Ident ) ,
363+ k if k. is_keyword ( ) => make_leaf ! ( Ident ) ,
364+ k if k. is_literal ( ) => make_leaf ! ( Literal ) ,
365+ _ => return ,
366+ } ;
367+
368+ leaf. into ( )
369+ } ) ;
370+ }
371+ }
372+
373+ // FIXME: There are some duplicate logic between RawConvertor and Convertor
374+ // It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus
375+ // use RawConvertor directly. But performance-wise it may not be a good idea ?
376+ struct Convertor {
377+ id_alloc : TokenIdAlloc ,
378+ }
379+
197380impl Convertor {
198381 fn go ( & mut self , tt : & SyntaxNode ) -> Option < tt:: Subtree > {
199382 // This tree is empty
@@ -236,7 +419,7 @@ impl Convertor {
236419 } ;
237420 let delimiter = delimiter_kind. map ( |kind| tt:: Delimiter {
238421 kind,
239- id : self . alloc_delim ( first_child. text_range ( ) , last_child. text_range ( ) ) ,
422+ id : self . id_alloc . delim ( first_child. text_range ( ) , last_child. text_range ( ) ) ,
240423 } ) ;
241424
242425 let mut token_trees = Vec :: new ( ) ;
@@ -273,7 +456,7 @@ impl Convertor {
273456 tt:: Leaf :: from ( tt:: Punct {
274457 char,
275458 spacing,
276- id : self . alloc ( token. text_range ( ) ) ,
459+ id : self . id_alloc . alloc ( token. text_range ( ) ) ,
277460 } )
278461 . into ( ) ,
279462 ) ;
@@ -282,7 +465,7 @@ impl Convertor {
282465 macro_rules! make_leaf {
283466 ( $i: ident) => {
284467 tt:: $i {
285- id: self . alloc( token. text_range( ) ) ,
468+ id: self . id_alloc . alloc( token. text_range( ) ) ,
286469 text: token. text( ) . clone( ) ,
287470 }
288471 . into( )
@@ -313,28 +496,6 @@ impl Convertor {
313496 let res = tt:: Subtree { delimiter, token_trees } ;
314497 Some ( res)
315498 }
316-
317- fn alloc ( & mut self , absolute_range : TextRange ) -> tt:: TokenId {
318- let relative_range = absolute_range - self . global_offset ;
319- let token_id = tt:: TokenId ( self . next_id ) ;
320- self . next_id += 1 ;
321- self . map . insert ( token_id, relative_range) ;
322- token_id
323- }
324-
325- fn alloc_delim (
326- & mut self ,
327- open_abs_range : TextRange ,
328- close_abs_range : TextRange ,
329- ) -> tt:: TokenId {
330- let open_relative_range = open_abs_range - self . global_offset ;
331- let close_relative_range = close_abs_range - self . global_offset ;
332- let token_id = tt:: TokenId ( self . next_id ) ;
333- self . next_id += 1 ;
334-
335- self . map . insert_delim ( token_id, open_relative_range, close_relative_range) ;
336- token_id
337- }
338499}
339500
340501struct TtTreeSink < ' a > {
0 commit comments