diff --git a/grammar.js b/grammar.js index 657b78d..1436e17 100644 --- a/grammar.js +++ b/grammar.js @@ -124,7 +124,7 @@ module.exports = grammar({ attribute_name: _ => /[^<>"'/=\s]+/, - attribute_value: _ => /[^<>"'=\s]+/, + attribute_value: _ => /[^<>"'=\s][^>\s]*/, // An entity can be named, numeric (decimal), or numeric (hexacecimal). The // longest entity name is 29 characters long, and the HTML spec says that diff --git a/src/grammar.json b/src/grammar.json index be7e717..c36bc07 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -370,7 +370,7 @@ }, "attribute_value": { "type": "PATTERN", - "value": "[^<>\"'=\\s]+" + "value": "[^<>\"'=\\s][^>\\s]*" }, "entity": { "type": "PATTERN", @@ -497,5 +497,6 @@ } ], "inline": [], - "supertypes": [] -} + "supertypes": [], + "reserved": {} +} \ No newline at end of file diff --git a/src/node-types.json b/src/node-types.json index 384ae87..3066ed1 100644 --- a/src/node-types.json +++ b/src/node-types.json @@ -289,7 +289,8 @@ }, { "type": "comment", - "named": true + "named": true, + "extra": true }, { "type": "doctype", diff --git a/src/parser.c b/src/parser.c index a342e0c..ce69461 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,10 +1,12 @@ +/* Automatically @generated by tree-sitter v0.25.6 */ + #include "tree_sitter/parser.h" #if defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif -#define LANGUAGE_VERSION 14 +#define LANGUAGE_VERSION 15 #define STATE_COUNT 94 #define LARGE_STATE_COUNT 2 #define SYMBOL_COUNT 41 @@ -13,7 +15,9 @@ #define EXTERNAL_TOKEN_COUNT 9 #define FIELD_COUNT 0 #define MAX_ALIAS_SEQUENCE_LENGTH 4 +#define MAX_RESERVED_WORD_SET_SIZE 0 #define PRODUCTION_ID_COUNT 1 +#define SUPERTYPE_COUNT 0 enum ts_symbol_identifiers { anon_sym_LT_BANG = 1, @@ -587,9 +591,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { if (lookahead != 0 && (lookahead < '\t' || '\r' < lookahead) && lookahead != ' ' && - lookahead != '"' && - lookahead != '\'' && - (lookahead < '<' || '>' < lookahead)) ADVANCE(29); + lookahead != '>') ADVANCE(29); END_STATE(); case 30: ACCEPT_TOKEN(sym_entity); @@ -871,7 +873,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { } } -static const TSLexMode ts_lex_modes[STATE_COUNT] = { +static const TSLexerMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0, .external_lex_state = 1}, [1] = {.lex_state = 17, .external_lex_state = 2}, [2] = {.lex_state = 17, .external_lex_state = 3}, @@ -969,7 +971,7 @@ static const TSLexMode ts_lex_modes[STATE_COUNT] = { }; static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { - [0] = { + [STATE(0)] = { [ts_builtin_sym_end] = ACTIONS(1), [anon_sym_LT_BANG] = ACTIONS(1), [anon_sym_GT] = ACTIONS(1), @@ -990,7 +992,7 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [sym_raw_text] = ACTIONS(1), [sym_comment] = ACTIONS(3), }, - [1] = { + [STATE(1)] = { [sym_document] = STATE(83), [sym_doctype] = STATE(7), [sym__node] = STATE(7), @@ -2261,7 +2263,7 @@ void tree_sitter_html_external_scanner_deserialize(void *, const char *, unsigne TS_PUBLIC const TSLanguage *tree_sitter_html(void) { static const TSLanguage language = { - .version = LANGUAGE_VERSION, + .abi_version = LANGUAGE_VERSION, .symbol_count = SYMBOL_COUNT, .alias_count = ALIAS_COUNT, .token_count = TOKEN_COUNT, @@ -2269,6 +2271,7 @@ TS_PUBLIC const TSLanguage *tree_sitter_html(void) { .state_count = STATE_COUNT, .large_state_count = LARGE_STATE_COUNT, .production_id_count = PRODUCTION_ID_COUNT, + .supertype_count = SUPERTYPE_COUNT, .field_count = FIELD_COUNT, .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, .parse_table = &ts_parse_table[0][0], @@ -2280,7 +2283,7 @@ TS_PUBLIC const TSLanguage *tree_sitter_html(void) { .public_symbol_map = ts_symbol_map, .alias_map = ts_non_terminal_alias_map, .alias_sequences = &ts_alias_sequences[0][0], - .lex_modes = ts_lex_modes, + .lex_modes = (const void*)ts_lex_modes, .lex_fn = ts_lex, .external_scanner = { &ts_external_scanner_states[0][0], @@ -2292,6 +2295,13 @@ TS_PUBLIC const TSLanguage *tree_sitter_html(void) { tree_sitter_html_external_scanner_deserialize, }, .primary_state_ids = ts_primary_state_ids, + .name = "html", + .max_reserved_word_set_size = 0, + .metadata = { + .major_version = 0, + .minor_version = 23, + .patch_version = 2, + }, }; return &language; } diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h index 15a3b23..a17a574 100644 --- a/src/tree_sitter/array.h +++ b/src/tree_sitter/array.h @@ -14,6 +14,7 @@ extern "C" { #include #ifdef _MSC_VER +#pragma warning(push) #pragma warning(disable : 4101) #elif defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic push @@ -278,7 +279,7 @@ static inline void _array__splice(Array *self, size_t element_size, #define _compare_int(a, b) ((int)*(a) - (int)(b)) #ifdef _MSC_VER -#pragma warning(default : 4101) +#pragma warning(pop) #elif defined(__GNUC__) || defined(__clang__) #pragma GCC diagnostic pop #endif diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h index 799f599..858107d 100644 --- a/src/tree_sitter/parser.h +++ b/src/tree_sitter/parser.h @@ -18,6 +18,11 @@ typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; +typedef struct TSLanguageMetadata { + uint8_t major_version; + uint8_t minor_version; + uint8_t patch_version; +} TSLanguageMetadata; #endif typedef struct { @@ -26,10 +31,11 @@ typedef struct { bool inherited; } TSFieldMapEntry; +// Used to index the field and supertype maps. typedef struct { uint16_t index; uint16_t length; -} TSFieldMapSlice; +} TSMapSlice; typedef struct { bool visible; @@ -79,6 +85,12 @@ typedef struct { uint16_t external_lex_state; } TSLexMode; +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; + uint16_t reserved_word_set_id; +} TSLexerMode; + typedef union { TSParseAction action; struct { @@ -93,7 +105,7 @@ typedef struct { } TSCharacterRange; struct TSLanguage { - uint32_t version; + uint32_t abi_version; uint32_t symbol_count; uint32_t alias_count; uint32_t token_count; @@ -109,13 +121,13 @@ struct TSLanguage { const TSParseActionEntry *parse_actions; const char * const *symbol_names; const char * const *field_names; - const TSFieldMapSlice *field_map_slices; + const TSMapSlice *field_map_slices; const TSFieldMapEntry *field_map_entries; const TSSymbolMetadata *symbol_metadata; const TSSymbol *public_symbol_map; const uint16_t *alias_map; const TSSymbol *alias_sequences; - const TSLexMode *lex_modes; + const TSLexerMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; @@ -129,15 +141,23 @@ struct TSLanguage { void (*deserialize)(void *, const char *, unsigned); } external_scanner; const TSStateId *primary_state_ids; + const char *name; + const TSSymbol *reserved_words; + uint16_t max_reserved_word_set_size; + uint32_t supertype_count; + const TSSymbol *supertype_symbols; + const TSMapSlice *supertype_map_slices; + const TSSymbol *supertype_map_entries; + TSLanguageMetadata metadata; }; -static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { +static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { uint32_t index = 0; uint32_t size = len - index; while (size > 1) { uint32_t half_size = size / 2; uint32_t mid_index = index + half_size; - TSCharacterRange *range = &ranges[mid_index]; + const TSCharacterRange *range = &ranges[mid_index]; if (lookahead >= range->start && lookahead <= range->end) { return true; } else if (lookahead > range->end) { @@ -145,7 +165,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t } size -= half_size; } - TSCharacterRange *range = &ranges[index]; + const TSCharacterRange *range = &ranges[index]; return (lookahead >= range->start && lookahead <= range->end); }