@@ -13,12 +13,15 @@ enum TokenType {
13
13
BLOCK_INNER_DOC_MARKER ,
14
14
BLOCK_COMMENT_CONTENT ,
15
15
LINE_DOC_CONTENT ,
16
- FRONTMATTER ,
16
+ FRONTMATTER_START ,
17
+ FRONTMATTER_CONTENT ,
18
+ FRONTMATTER_END ,
17
19
ERROR_SENTINEL
18
20
};
19
21
20
22
typedef struct {
21
23
uint8_t opening_hash_count ;
24
+ uint8_t frontmatter_dashes ;
22
25
} Scanner ;
23
26
24
27
void * tree_sitter_rust_external_scanner_create () { return ts_calloc (1 , sizeof (Scanner )); }
@@ -28,15 +31,17 @@ void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner
28
31
unsigned tree_sitter_rust_external_scanner_serialize (void * payload , char * buffer ) {
29
32
Scanner * scanner = (Scanner * )payload ;
30
33
buffer [0 ] = (char )scanner -> opening_hash_count ;
31
- return 1 ;
34
+ buffer [1 ] = (char )scanner -> frontmatter_dashes ;
35
+ return 2 ;
32
36
}
33
37
34
38
void tree_sitter_rust_external_scanner_deserialize (void * payload , const char * buffer , unsigned length ) {
35
39
Scanner * scanner = (Scanner * )payload ;
36
40
scanner -> opening_hash_count = 0 ;
37
- if ( length == 1 ) {
38
- Scanner * scanner = ( Scanner * ) payload ;
41
+ scanner -> frontmatter_dashes = 0 ;
42
+ if ( length == 2 ) {
39
43
scanner -> opening_hash_count = buffer [0 ];
44
+ scanner -> frontmatter_dashes = buffer [1 ];
40
45
}
41
46
}
42
47
@@ -332,15 +337,42 @@ static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbo
332
337
return false;
333
338
}
334
339
335
- static inline bool process_frontmatter (TSLexer * lexer ) {
336
- uint8_t opening = 0 ;
340
+ static inline bool process_frontmatter_start (TSLexer * lexer , Scanner * scanner ) {
341
+ uint8_t amount = 0 ;
337
342
while (lexer -> lookahead == '-' ) {
338
- opening ++ ;
343
+ amount ++ ;
339
344
advance (lexer );
340
345
}
341
346
342
- if (opening < 3 ) {
347
+ if (amount < 3 ) {
343
348
return false;
349
+ } else {
350
+ scanner -> frontmatter_dashes = amount ;
351
+ lexer -> result_symbol = FRONTMATTER_START ;
352
+
353
+ // parse optional info string after the initial fence
354
+ while (lexer -> lookahead != '\n' && !lexer -> eof (lexer )) {
355
+ advance (lexer );
356
+ }
357
+ advance (lexer );
358
+
359
+ return true;
360
+ }
361
+ }
362
+
363
+ static inline bool process_frontmatter (TSLexer * lexer , Scanner * scanner ) {
364
+ // seperately parse empty frontmatter, as tree-sitter strips all whitespace,
365
+ // including newlines, so i can't rely on parsing only after a newline in this case.
366
+ lexer -> mark_end (lexer );
367
+ uint8_t amount = 0 ;
368
+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
369
+ amount ++ ;
370
+ advance (lexer );
371
+ }
372
+
373
+ if (amount == scanner -> frontmatter_dashes ) {
374
+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
375
+ return true;
344
376
}
345
377
346
378
for (;;) {
@@ -349,16 +381,17 @@ static inline bool process_frontmatter(TSLexer *lexer) {
349
381
}
350
382
351
383
if (lexer -> lookahead == '\n' ) {
384
+ lexer -> mark_end (lexer );
352
385
advance (lexer );
353
386
354
387
uint8_t amount = 0 ;
355
- while (lexer -> lookahead == '-' && amount < opening ) {
388
+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
356
389
amount ++ ;
357
390
advance (lexer );
358
391
}
359
392
360
- if (amount == opening ) {
361
- lexer -> result_symbol = FRONTMATTER ;
393
+ if (amount == scanner -> frontmatter_dashes ) {
394
+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
362
395
return true;
363
396
}
364
397
} else {
@@ -367,6 +400,16 @@ static inline bool process_frontmatter(TSLexer *lexer) {
367
400
}
368
401
}
369
402
403
+ static inline bool process_frontmatter_end (TSLexer * lexer , Scanner * scanner ) {
404
+ advance (lexer );
405
+ for (unsigned int amount = 0 ; amount < scanner -> frontmatter_dashes ; amount ++ ) {
406
+ advance (lexer );
407
+ }
408
+
409
+ lexer -> result_symbol = FRONTMATTER_END ;
410
+ return true;
411
+ }
412
+
370
413
bool tree_sitter_rust_external_scanner_scan (void * payload , TSLexer * lexer , const bool * valid_symbols ) {
371
414
// The documentation states that if the lexical analysis fails for some reason
372
415
// they will mark every state as valid and pass it to the external scanner
@@ -425,8 +468,16 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
425
468
return process_float_literal (lexer );
426
469
}
427
470
428
- if (valid_symbols [FRONTMATTER ]) {
429
- return process_frontmatter (lexer );
471
+ if (valid_symbols [FRONTMATTER_START ]) {
472
+ return process_frontmatter_start (lexer , scanner );
473
+ }
474
+
475
+ if (valid_symbols [FRONTMATTER_CONTENT ]) {
476
+ return process_frontmatter (lexer , scanner );
477
+ }
478
+
479
+ if (valid_symbols [FRONTMATTER_END ]) {
480
+ return process_frontmatter_end (lexer , scanner );
430
481
}
431
482
432
483
return false;
0 commit comments