@@ -13,12 +13,15 @@ enum TokenType {
1313 BLOCK_INNER_DOC_MARKER ,
1414 BLOCK_COMMENT_CONTENT ,
1515 LINE_DOC_CONTENT ,
16- FRONTMATTER ,
16+ FRONTMATTER_START ,
17+ FRONTMATTER_CONTENT ,
18+ FRONTMATTER_END ,
1719 ERROR_SENTINEL
1820};
1921
2022typedef struct {
2123 uint8_t opening_hash_count ;
24+ uint8_t frontmatter_dashes ;
2225} Scanner ;
2326
2427void * tree_sitter_rust_external_scanner_create () { return ts_calloc (1 , sizeof (Scanner )); }
@@ -28,15 +31,16 @@ void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner
2831unsigned tree_sitter_rust_external_scanner_serialize (void * payload , char * buffer ) {
2932 Scanner * scanner = (Scanner * )payload ;
3033 buffer [0 ] = (char )scanner -> opening_hash_count ;
31- return 1 ;
34+ buffer [1 ] = (char )scanner -> frontmatter_dashes ;
35+ return 2 ;
3236}
3337
3438void tree_sitter_rust_external_scanner_deserialize (void * payload , const char * buffer , unsigned length ) {
3539 Scanner * scanner = (Scanner * )payload ;
3640 scanner -> opening_hash_count = 0 ;
37- if (length == 1 ) {
38- Scanner * scanner = (Scanner * )payload ;
41+ if (length == 2 ) {
3942 scanner -> opening_hash_count = buffer [0 ];
43+ scanner -> frontmatter_dashes = buffer [1 ];
4044 }
4145}
4246
@@ -332,15 +336,42 @@ static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbo
332336 return false;
333337}
334338
335- static inline bool process_frontmatter (TSLexer * lexer ) {
336- uint8_t opening = 0 ;
339+ static inline bool process_frontmatter_start (TSLexer * lexer , Scanner * scanner ) {
340+ uint8_t amount = 0 ;
337341 while (lexer -> lookahead == '-' ) {
338- opening ++ ;
342+ amount ++ ;
339343 advance (lexer );
340344 }
341345
342- if (opening < 3 ) {
346+ if (amount < 3 ) {
343347 return false;
348+ } else {
349+ scanner -> frontmatter_dashes = amount ;
350+ lexer -> result_symbol = FRONTMATTER_START ;
351+
352+ // parse optional info string after the initial fence
353+ while (lexer -> lookahead != '\n' && !lexer -> eof (lexer )) {
354+ advance (lexer );
355+ }
356+ advance (lexer );
357+
358+ return true;
359+ }
360+ }
361+
362+ static inline bool process_frontmatter (TSLexer * lexer , Scanner * scanner ) {
363+ // seperately parse empty frontmatter, as tree-sitter strips all whitespace,
364+ // including newlines, so i can't rely on parsing only after a newline in this case.
365+ lexer -> mark_end (lexer );
366+ uint8_t amount = 0 ;
367+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
368+ amount ++ ;
369+ advance (lexer );
370+ }
371+
372+ if (amount == scanner -> frontmatter_dashes ) {
373+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
374+ return true;
344375 }
345376
346377 for (;;) {
@@ -349,16 +380,17 @@ static inline bool process_frontmatter(TSLexer *lexer) {
349380 }
350381
351382 if (lexer -> lookahead == '\n' ) {
383+ lexer -> mark_end (lexer );
352384 advance (lexer );
353385
354386 uint8_t amount = 0 ;
355- while (lexer -> lookahead == '-' && amount < opening ) {
387+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
356388 amount ++ ;
357389 advance (lexer );
358390 }
359391
360- if (amount == opening ) {
361- lexer -> result_symbol = FRONTMATTER ;
392+ if (amount == scanner -> frontmatter_dashes ) {
393+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
362394 return true;
363395 }
364396 } else {
@@ -367,6 +399,16 @@ static inline bool process_frontmatter(TSLexer *lexer) {
367399 }
368400}
369401
402+ static inline bool process_frontmatter_end (TSLexer * lexer , Scanner * scanner ) {
403+ advance (lexer );
404+ for (unsigned int amount = 0 ; amount < scanner -> frontmatter_dashes ; amount ++ ) {
405+ advance (lexer );
406+ }
407+
408+ lexer -> result_symbol = FRONTMATTER_END ;
409+ return true;
410+ }
411+
370412bool tree_sitter_rust_external_scanner_scan (void * payload , TSLexer * lexer , const bool * valid_symbols ) {
371413 // The documentation states that if the lexical analysis fails for some reason
372414 // they will mark every state as valid and pass it to the external scanner
@@ -425,8 +467,16 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
425467 return process_float_literal (lexer );
426468 }
427469
428- if (valid_symbols [FRONTMATTER ]) {
429- return process_frontmatter (lexer );
470+ if (valid_symbols [FRONTMATTER_START ]) {
471+ return process_frontmatter_start (lexer , scanner );
472+ }
473+
474+ if (valid_symbols [FRONTMATTER_CONTENT ]) {
475+ return process_frontmatter (lexer , scanner );
476+ }
477+
478+ if (valid_symbols [FRONTMATTER_END ]) {
479+ return process_frontmatter_end (lexer , scanner );
430480 }
431481
432482 return false;
0 commit comments