Skip to content

Commit c90aa18

Browse files
committed
parse (frontmatter_content)
1 parent 58df600 commit c90aa18

File tree

2 files changed

+73
-14
lines changed

2 files changed

+73
-14
lines changed

grammar.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ module.exports = grammar({
7676
$._inner_block_doc_comment_marker,
7777
$._block_comment_content,
7878
$._line_doc_content,
79-
$.frontmatter,
79+
$._frontmatter_start,
80+
$.frontmatter_content,
81+
$._frontmatter_end,
8082
$._error_sentinel,
8183
],
8284

@@ -1650,6 +1652,12 @@ module.exports = grammar({
16501652

16511653
shebang: _ => /#![\r\f\t\v ]*([^\[\n].*)?\n/,
16521654

1655+
frontmatter: $ => seq(
1656+
$._frontmatter_start,
1657+
$.frontmatter_content,
1658+
$._frontmatter_end,
1659+
),
1660+
16531661
_reserved_identifier: $ => alias(choice(
16541662
'default',
16551663
'union',

src/scanner.c

Lines changed: 64 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@ enum TokenType {
1313
BLOCK_INNER_DOC_MARKER,
1414
BLOCK_COMMENT_CONTENT,
1515
LINE_DOC_CONTENT,
16-
FRONTMATTER,
16+
FRONTMATTER_START,
17+
FRONTMATTER_CONTENT,
18+
FRONTMATTER_END,
1719
ERROR_SENTINEL
1820
};
1921

2022
typedef struct {
2123
uint8_t opening_hash_count;
24+
uint8_t frontmatter_dashes;
2225
} Scanner;
2326

2427
void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
@@ -28,15 +31,17 @@ void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner
2831
unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
2932
Scanner *scanner = (Scanner *)payload;
3033
buffer[0] = (char)scanner->opening_hash_count;
31-
return 1;
34+
buffer[1] = (char)scanner->frontmatter_dashes;
35+
return 2;
3236
}
3337

3438
void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
3539
Scanner *scanner = (Scanner *)payload;
3640
scanner->opening_hash_count = 0;
37-
if (length == 1) {
38-
Scanner *scanner = (Scanner *)payload;
41+
scanner->frontmatter_dashes = 0;
42+
if (length == 2) {
3943
scanner->opening_hash_count = buffer[0];
44+
scanner->frontmatter_dashes = buffer[1];
4045
}
4146
}
4247

@@ -332,15 +337,42 @@ static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbo
332337
return false;
333338
}
334339

335-
static inline bool process_frontmatter(TSLexer *lexer) {
336-
uint8_t opening = 0;
340+
static inline bool process_frontmatter_start(TSLexer *lexer, Scanner *scanner) {
341+
uint8_t amount = 0;
337342
while (lexer->lookahead == '-') {
338-
opening++;
343+
amount++;
339344
advance(lexer);
340345
}
341346

342-
if (opening < 3) {
347+
if (amount < 3) {
343348
return false;
349+
} else {
350+
scanner->frontmatter_dashes = amount;
351+
lexer->result_symbol = FRONTMATTER_START;
352+
353+
// parse optional info string after the initial fence
354+
while (lexer->lookahead != '\n' && !lexer->eof(lexer)) {
355+
advance(lexer);
356+
}
357+
advance(lexer);
358+
359+
return true;
360+
}
361+
}
362+
363+
static inline bool process_frontmatter(TSLexer *lexer, Scanner *scanner) {
364+
// seperately parse empty frontmatter, as tree-sitter strips all whitespace,
365+
// including newlines, so i can't rely on parsing only after a newline in this case.
366+
lexer->mark_end(lexer);
367+
uint8_t amount = 0;
368+
while (lexer->lookahead == '-' && amount < scanner->frontmatter_dashes) {
369+
amount++;
370+
advance(lexer);
371+
}
372+
373+
if (amount == scanner->frontmatter_dashes) {
374+
lexer->result_symbol = FRONTMATTER_CONTENT;
375+
return true;
344376
}
345377

346378
for (;;) {
@@ -349,16 +381,17 @@ static inline bool process_frontmatter(TSLexer *lexer) {
349381
}
350382

351383
if (lexer->lookahead == '\n') {
384+
lexer->mark_end(lexer);
352385
advance(lexer);
353386

354387
uint8_t amount = 0;
355-
while (lexer->lookahead == '-' && amount < opening) {
388+
while (lexer->lookahead == '-' && amount < scanner->frontmatter_dashes) {
356389
amount++;
357390
advance(lexer);
358391
}
359392

360-
if (amount == opening) {
361-
lexer->result_symbol = FRONTMATTER;
393+
if (amount == scanner->frontmatter_dashes) {
394+
lexer->result_symbol = FRONTMATTER_CONTENT;
362395
return true;
363396
}
364397
} else {
@@ -367,6 +400,16 @@ static inline bool process_frontmatter(TSLexer *lexer) {
367400
}
368401
}
369402

403+
static inline bool process_frontmatter_end(TSLexer *lexer, Scanner *scanner) {
404+
advance(lexer);
405+
for (unsigned int amount = 0; amount < scanner->frontmatter_dashes; amount++) {
406+
advance(lexer);
407+
}
408+
409+
lexer->result_symbol = FRONTMATTER_END;
410+
return true;
411+
}
412+
370413
bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
371414
// The documentation states that if the lexical analysis fails for some reason
372415
// they will mark every state as valid and pass it to the external scanner
@@ -425,8 +468,16 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
425468
return process_float_literal(lexer);
426469
}
427470

428-
if (valid_symbols[FRONTMATTER]) {
429-
return process_frontmatter(lexer);
471+
if (valid_symbols[FRONTMATTER_START]) {
472+
return process_frontmatter_start(lexer, scanner);
473+
}
474+
475+
if (valid_symbols[FRONTMATTER_CONTENT]) {
476+
return process_frontmatter(lexer, scanner);
477+
}
478+
479+
if (valid_symbols[FRONTMATTER_END]) {
480+
return process_frontmatter_end(lexer, scanner);
430481
}
431482

432483
return false;

0 commit comments

Comments
 (0)