Skip to content

Commit 94085df

Browse files
committed
fix: mark helper functions static, do not skip leading whitespace
1 parent e5d7d7d commit 94085df

File tree

4 files changed

+102
-130
lines changed

4 files changed

+102
-130
lines changed

src/parser.c

Lines changed: 88 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include <tree_sitter/parser.h>
1+
#include "tree_sitter/parser.h"
22

33
#if defined(__GNUC__) || defined(__clang__)
44
#pragma GCC diagnostic push
@@ -16,7 +16,7 @@
1616
#define MAX_ALIAS_SEQUENCE_LENGTH 4
1717
#define PRODUCTION_ID_COUNT 1
1818

19-
enum {
19+
enum ts_symbol_identifiers {
2020
anon_sym_LT_BANG = 1,
2121
aux_sym_doctype_token1 = 2,
2222
anon_sym_GT = 3,
@@ -421,7 +421,6 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
421421

422422
static bool ts_lex(TSLexer *lexer, TSStateId state) {
423423
START_LEXER();
424-
eof = lexer->eof(lexer);
425424
switch (state) {
426425
case 0:
427426
if (eof) ADVANCE(57);
@@ -434,26 +433,20 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
434433
if (lookahead == '>') ADVANCE(61);
435434
if (lookahead == 'D' ||
436435
lookahead == 'd') ADVANCE(48);
437-
if (lookahead == '\t' ||
438-
lookahead == '\n' ||
439-
lookahead == '\r' ||
436+
if (('\t' <= lookahead && lookahead <= '\r') ||
440437
lookahead == ' ') SKIP(0)
441438
END_STATE();
442439
case 1:
443440
if (lookahead == '"') ADVANCE(73);
444441
if (lookahead == '\'') ADVANCE(70);
445-
if (lookahead == '\t' ||
446-
lookahead == '\n' ||
447-
lookahead == '\r' ||
442+
if (('\t' <= lookahead && lookahead <= '\r') ||
448443
lookahead == ' ') SKIP(1)
449444
if (lookahead != 0 &&
450445
(lookahead < '<' || '>' < lookahead)) ADVANCE(68);
451446
END_STATE();
452447
case 2:
453448
if (lookahead == '"') ADVANCE(73);
454-
if (lookahead == '\t' ||
455-
lookahead == '\n' ||
456-
lookahead == '\r' ||
449+
if (('\t' <= lookahead && lookahead <= '\r') ||
457450
lookahead == ' ') ADVANCE(74);
458451
if (lookahead != 0) ADVANCE(75);
459452
END_STATE();
@@ -464,19 +457,15 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
464457
END_STATE();
465458
case 4:
466459
if (lookahead == '\'') ADVANCE(70);
467-
if (lookahead == '\t' ||
468-
lookahead == '\n' ||
469-
lookahead == '\r' ||
460+
if (('\t' <= lookahead && lookahead <= '\r') ||
470461
lookahead == ' ') ADVANCE(71);
471462
if (lookahead != 0) ADVANCE(72);
472463
END_STATE();
473464
case 5:
474465
if (lookahead == '/') ADVANCE(45);
475466
if (lookahead == '=') ADVANCE(66);
476467
if (lookahead == '>') ADVANCE(61);
477-
if (lookahead == '\t' ||
478-
lookahead == '\n' ||
479-
lookahead == '\r' ||
468+
if (('\t' <= lookahead && lookahead <= '\r') ||
480469
lookahead == ' ') SKIP(5)
481470
if (lookahead != 0 &&
482471
lookahead != '"' &&
@@ -710,19 +699,15 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
710699
lookahead == 'y') ADVANCE(49);
711700
END_STATE();
712701
case 53:
713-
if (lookahead == '\t' ||
714-
lookahead == '\n' ||
715-
lookahead == '\r' ||
702+
if (('\t' <= lookahead && lookahead <= '\r') ||
716703
lookahead == ' ') ADVANCE(53);
717704
if (lookahead != 0 &&
718705
lookahead != '&' &&
719706
lookahead != '<' &&
720707
lookahead != '>') ADVANCE(76);
721708
END_STATE();
722709
case 54:
723-
if (lookahead == '\t' ||
724-
lookahead == '\n' ||
725-
lookahead == '\r' ||
710+
if (('\t' <= lookahead && lookahead <= '\r') ||
726711
lookahead == ' ') ADVANCE(59);
727712
if (lookahead != 0 &&
728713
lookahead != '>') ADVANCE(60);
@@ -736,9 +721,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
736721
if (eof) ADVANCE(57);
737722
if (lookahead == '&') ADVANCE(3);
738723
if (lookahead == '<') ADVANCE(63);
739-
if (lookahead == '\t' ||
740-
lookahead == '\n' ||
741-
lookahead == '\r' ||
724+
if (('\t' <= lookahead && lookahead <= '\r') ||
742725
lookahead == ' ') SKIP(56)
743726
if (lookahead != 0 &&
744727
lookahead != '>') ADVANCE(76);
@@ -751,9 +734,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
751734
END_STATE();
752735
case 59:
753736
ACCEPT_TOKEN(aux_sym_doctype_token1);
754-
if (lookahead == '\t' ||
755-
lookahead == '\n' ||
756-
lookahead == '\r' ||
737+
if (('\t' <= lookahead && lookahead <= '\r') ||
757738
lookahead == ' ') ADVANCE(59);
758739
if (lookahead != 0 &&
759740
lookahead != '>') ADVANCE(60);
@@ -786,9 +767,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
786767
case 67:
787768
ACCEPT_TOKEN(sym_attribute_name);
788769
if (lookahead != 0 &&
789-
lookahead != '\t' &&
790-
lookahead != '\n' &&
791-
lookahead != '\r' &&
770+
(lookahead < '\t' || '\r' < lookahead) &&
792771
lookahead != ' ' &&
793772
lookahead != '"' &&
794773
lookahead != '\'' &&
@@ -798,9 +777,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
798777
case 68:
799778
ACCEPT_TOKEN(sym_attribute_value);
800779
if (lookahead != 0 &&
801-
lookahead != '\t' &&
802-
lookahead != '\n' &&
803-
lookahead != '\r' &&
780+
(lookahead < '\t' || '\r' < lookahead) &&
804781
lookahead != ' ' &&
805782
lookahead != '"' &&
806783
lookahead != '\'' &&
@@ -814,9 +791,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
814791
END_STATE();
815792
case 71:
816793
ACCEPT_TOKEN(aux_sym_quoted_attribute_value_token1);
817-
if (lookahead == '\t' ||
818-
lookahead == '\n' ||
819-
lookahead == '\r' ||
794+
if (('\t' <= lookahead && lookahead <= '\r') ||
820795
lookahead == ' ') ADVANCE(71);
821796
if (lookahead != 0 &&
822797
lookahead != '\'') ADVANCE(72);
@@ -831,9 +806,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
831806
END_STATE();
832807
case 74:
833808
ACCEPT_TOKEN(aux_sym_quoted_attribute_value_token2);
834-
if (lookahead == '\t' ||
835-
lookahead == '\n' ||
836-
lookahead == '\r' ||
809+
if (('\t' <= lookahead && lookahead <= '\r') ||
837810
lookahead == ' ') ADVANCE(74);
838811
if (lookahead != 0 &&
839812
lookahead != '"') ADVANCE(75);
@@ -845,9 +818,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
845818
END_STATE();
846819
case 76:
847820
ACCEPT_TOKEN(sym_text);
848-
if (lookahead == '\t' ||
849-
lookahead == '\n' ||
850-
lookahead == '\r' ||
821+
if (('\t' <= lookahead && lookahead <= '\r') ||
851822
lookahead == ' ') ADVANCE(53);
852823
if (lookahead != 0 &&
853824
lookahead != '&' &&
@@ -956,78 +927,6 @@ static const TSLexMode ts_lex_modes[STATE_COUNT] = {
956927
[93] = {.lex_state = 0, .external_lex_state = 2},
957928
};
958929

959-
enum {
960-
ts_external_token__start_tag_name = 0,
961-
ts_external_token__script_start_tag_name = 1,
962-
ts_external_token__style_start_tag_name = 2,
963-
ts_external_token__end_tag_name = 3,
964-
ts_external_token_erroneous_end_tag_name = 4,
965-
ts_external_token_SLASH_GT = 5,
966-
ts_external_token__implicit_end_tag = 6,
967-
ts_external_token_raw_text = 7,
968-
ts_external_token_comment = 8,
969-
};
970-
971-
static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {
972-
[ts_external_token__start_tag_name] = sym__start_tag_name,
973-
[ts_external_token__script_start_tag_name] = sym__script_start_tag_name,
974-
[ts_external_token__style_start_tag_name] = sym__style_start_tag_name,
975-
[ts_external_token__end_tag_name] = sym__end_tag_name,
976-
[ts_external_token_erroneous_end_tag_name] = sym_erroneous_end_tag_name,
977-
[ts_external_token_SLASH_GT] = anon_sym_SLASH_GT,
978-
[ts_external_token__implicit_end_tag] = sym__implicit_end_tag,
979-
[ts_external_token_raw_text] = sym_raw_text,
980-
[ts_external_token_comment] = sym_comment,
981-
};
982-
983-
static const bool ts_external_scanner_states[10][EXTERNAL_TOKEN_COUNT] = {
984-
[1] = {
985-
[ts_external_token__start_tag_name] = true,
986-
[ts_external_token__script_start_tag_name] = true,
987-
[ts_external_token__style_start_tag_name] = true,
988-
[ts_external_token__end_tag_name] = true,
989-
[ts_external_token_erroneous_end_tag_name] = true,
990-
[ts_external_token_SLASH_GT] = true,
991-
[ts_external_token__implicit_end_tag] = true,
992-
[ts_external_token_raw_text] = true,
993-
[ts_external_token_comment] = true,
994-
},
995-
[2] = {
996-
[ts_external_token_comment] = true,
997-
},
998-
[3] = {
999-
[ts_external_token__implicit_end_tag] = true,
1000-
[ts_external_token_comment] = true,
1001-
},
1002-
[4] = {
1003-
[ts_external_token_SLASH_GT] = true,
1004-
[ts_external_token_comment] = true,
1005-
},
1006-
[5] = {
1007-
[ts_external_token__start_tag_name] = true,
1008-
[ts_external_token__script_start_tag_name] = true,
1009-
[ts_external_token__style_start_tag_name] = true,
1010-
[ts_external_token_comment] = true,
1011-
},
1012-
[6] = {
1013-
[ts_external_token_raw_text] = true,
1014-
[ts_external_token_comment] = true,
1015-
},
1016-
[7] = {
1017-
[ts_external_token__end_tag_name] = true,
1018-
[ts_external_token_erroneous_end_tag_name] = true,
1019-
[ts_external_token_comment] = true,
1020-
},
1021-
[8] = {
1022-
[ts_external_token_erroneous_end_tag_name] = true,
1023-
[ts_external_token_comment] = true,
1024-
},
1025-
[9] = {
1026-
[ts_external_token__end_tag_name] = true,
1027-
[ts_external_token_comment] = true,
1028-
},
1029-
};
1030-
1031930
static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {
1032931
[0] = {
1033932
[ts_builtin_sym_end] = ACTIONS(1),
@@ -2230,6 +2129,78 @@ static const TSParseActionEntry ts_parse_actions[] = {
22302129
[251] = {.entry = {.count = 1, .reusable = true}}, SHIFT(90),
22312130
};
22322131

2132+
enum ts_external_scanner_symbol_identifiers {
2133+
ts_external_token__start_tag_name = 0,
2134+
ts_external_token__script_start_tag_name = 1,
2135+
ts_external_token__style_start_tag_name = 2,
2136+
ts_external_token__end_tag_name = 3,
2137+
ts_external_token_erroneous_end_tag_name = 4,
2138+
ts_external_token_SLASH_GT = 5,
2139+
ts_external_token__implicit_end_tag = 6,
2140+
ts_external_token_raw_text = 7,
2141+
ts_external_token_comment = 8,
2142+
};
2143+
2144+
static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {
2145+
[ts_external_token__start_tag_name] = sym__start_tag_name,
2146+
[ts_external_token__script_start_tag_name] = sym__script_start_tag_name,
2147+
[ts_external_token__style_start_tag_name] = sym__style_start_tag_name,
2148+
[ts_external_token__end_tag_name] = sym__end_tag_name,
2149+
[ts_external_token_erroneous_end_tag_name] = sym_erroneous_end_tag_name,
2150+
[ts_external_token_SLASH_GT] = anon_sym_SLASH_GT,
2151+
[ts_external_token__implicit_end_tag] = sym__implicit_end_tag,
2152+
[ts_external_token_raw_text] = sym_raw_text,
2153+
[ts_external_token_comment] = sym_comment,
2154+
};
2155+
2156+
static const bool ts_external_scanner_states[10][EXTERNAL_TOKEN_COUNT] = {
2157+
[1] = {
2158+
[ts_external_token__start_tag_name] = true,
2159+
[ts_external_token__script_start_tag_name] = true,
2160+
[ts_external_token__style_start_tag_name] = true,
2161+
[ts_external_token__end_tag_name] = true,
2162+
[ts_external_token_erroneous_end_tag_name] = true,
2163+
[ts_external_token_SLASH_GT] = true,
2164+
[ts_external_token__implicit_end_tag] = true,
2165+
[ts_external_token_raw_text] = true,
2166+
[ts_external_token_comment] = true,
2167+
},
2168+
[2] = {
2169+
[ts_external_token_comment] = true,
2170+
},
2171+
[3] = {
2172+
[ts_external_token__implicit_end_tag] = true,
2173+
[ts_external_token_comment] = true,
2174+
},
2175+
[4] = {
2176+
[ts_external_token_SLASH_GT] = true,
2177+
[ts_external_token_comment] = true,
2178+
},
2179+
[5] = {
2180+
[ts_external_token__start_tag_name] = true,
2181+
[ts_external_token__script_start_tag_name] = true,
2182+
[ts_external_token__style_start_tag_name] = true,
2183+
[ts_external_token_comment] = true,
2184+
},
2185+
[6] = {
2186+
[ts_external_token_raw_text] = true,
2187+
[ts_external_token_comment] = true,
2188+
},
2189+
[7] = {
2190+
[ts_external_token__end_tag_name] = true,
2191+
[ts_external_token_erroneous_end_tag_name] = true,
2192+
[ts_external_token_comment] = true,
2193+
},
2194+
[8] = {
2195+
[ts_external_token_erroneous_end_tag_name] = true,
2196+
[ts_external_token_comment] = true,
2197+
},
2198+
[9] = {
2199+
[ts_external_token__end_tag_name] = true,
2200+
[ts_external_token_comment] = true,
2201+
},
2202+
};
2203+
22332204
#ifdef __cplusplus
22342205
extern "C" {
22352206
#endif

src/scanner.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,15 +368,15 @@ static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) {
368368
}
369369

370370
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
371-
while (iswspace(lexer->lookahead)) {
372-
lexer->advance(lexer, true);
373-
}
374-
375371
if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] &&
376372
!valid_symbols[END_TAG_NAME]) {
377373
return scan_raw_text(scanner, lexer);
378374
}
379375

376+
while (iswspace(lexer->lookahead)) {
377+
lexer->advance(lexer, true);
378+
}
379+
380380
switch (lexer->lookahead) {
381381
case '<':
382382
lexer->mark_end(lexer);

src/tag.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "tree_sitter/parser.h"
2+
23
#include <assert.h>
34
#include <string.h>
45

@@ -288,7 +289,7 @@ static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = {
288289
NAV, OL, P, PRE, SECTION,
289290
};
290291

291-
TagType get_tag_from_string(const char *tag_name) {
292+
static TagType get_tag_from_string(const char *tag_name) {
292293
for (int i = 0; i < 126; i++) {
293294
if (strcmp(TAG_TYPES_BY_TAG_NAME[i].tag_name, tag_name) == 0) {
294295
return TAG_TYPES_BY_TAG_NAME[i].tag_value;
@@ -306,7 +307,7 @@ static inline Tag new_tag() {
306307
return tag;
307308
}
308309

309-
Tag make_tag(TagType type, const char *name) {
310+
static Tag make_tag(TagType type, const char *name) {
310311
Tag tag = new_tag();
311312
tag.type = type;
312313
if (type == CUSTOM) {
@@ -340,7 +341,7 @@ static inline bool tagcmp(const Tag *_tag1, const Tag *_tag2) {
340341
: true);
341342
}
342343

343-
bool can_contain(Tag *self, const Tag *other) {
344+
static bool can_contain(Tag *self, const Tag *other) {
344345
TagType child = other->type;
345346

346347
switch (self->type) {

0 commit comments

Comments
 (0)