Skip to content

Commit ec12b2f

Browse files
Merge pull request #906 from savetheclocktower/tree-sitter-february
Tree-sitter rolling fixes (February)
2 parents 114a724 + 9b1e8e5 commit ec12b2f

File tree

15 files changed

+466
-83
lines changed

15 files changed

+466
-83
lines changed

.eslintignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
*.ts
2+
vendor

packages/language-c/grammars/tree-sitter-c/highlights.scm

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
; PREPROCESSOR
23
; ============
34

@@ -16,21 +17,48 @@
1617
(["#if" "#ifdef" "#ifndef" "#endif" "#elif" "#else" "#define" "#include"] @punctuation.definition.directive.c
1718
(#set! adjust.endAfterFirstMatchOf "^#"))
1819

19-
20-
; This will match if the more specific rules above haven't matched. The
21-
; anonymous nodes will match under ideal conditions, but might not be present
22-
; if the parser is flummoxed.
20+
; `preproc_directive` will be used when the parser doesn't recognize the
21+
; directive as one of the above. It's permissive; `#afdfafsdfdfad` would be
22+
; parsed as a `preproc_directive`.
23+
;
24+
; Hence this rule will match if the more specific rules above haven't matched.
25+
; The anonymous nodes will match under ideal conditions, but might not be
26+
; present even when they ought to be _if_ the parser is flummoxed; so this'll
27+
; sometimes catch `#ifdef` and others.
2328
((preproc_directive) @keyword.control.directive.c
2429
(#set! capture.shy true))
2530

26-
((preproc_ifdef
27-
(identifier) @entity.name.function.preprocessor.c
28-
(#match? @entity.name.function.preprocessor.c "[a-zA-Z_$][\\w$]*")))
31+
((preproc_directive) @punctuation.definition.directive.c
32+
(#set! capture.shy true)
33+
(#set! adjust.endAfterFirstMatchOf "^#"))
2934

35+
; Macro functions are definitely entities.
3036
(preproc_function_def
3137
(identifier) @entity.name.function.preprocessor.c
3238
(#set! capture.final true))
3339

40+
; Identifiers in macro definitions are definitely constants.
41+
((preproc_def
42+
name: (identifier) @constant.preprocessor.c))
43+
44+
; We can also safely treat identifiers as constants in `#ifdef`…
45+
((preproc_ifdef
46+
(identifier) @constant.preprocessor.c))
47+
48+
; …and `#if` and `#elif`…
49+
(preproc_if
50+
(binary_expression
51+
(identifier) @constant.preprocessor.c))
52+
(preproc_elif
53+
(binary_expression
54+
(identifier) @constant.preprocessor.c))
55+
56+
; …and `#undef`.
57+
((preproc_call
58+
directive: (preproc_directive) @_IGNORE_
59+
argument: (preproc_arg) @constant.preprocessor.c)
60+
(#eq? @_IGNORE_ "#undef"))
61+
3462
(system_lib_string) @string.quoted.other.lt-gt.include.c
3563
((system_lib_string) @punctuation.definition.string.begin.c
3664
(#set! adjust.endAfterFirstMatchOf "^<"))
@@ -48,6 +76,15 @@
4876
(#set! capture.final true))
4977

5078
(primitive_type) @support.storage.type.builtin.c
79+
80+
; When the user has typed `#define FOO`, the macro injection thinks that `FOO`
81+
; is a type declaration (for some reason). This node structure seems to exist
82+
; only in that unusual and incorrect scenario, so we'll stop it from happening
83+
; so that it doesn't override the underlying `constant.other.c` scope.
84+
(translation_unit
85+
(type_identifier) @_IGNORE_
86+
(#set! capture.final))
87+
5188
(type_identifier) @support.other.storage.type.c
5289

5390
; These types are all reserved words; if we see an identifier with this name,
@@ -133,27 +170,31 @@
133170

134171
; The "x" in `int x;`
135172
(declaration
136-
declarator: (identifier) @variable.declaration.c)
173+
declarator: (identifier) @variable.other.declaration.c)
137174

138175
; The "x" in `int x = y;`
139176
(init_declarator
140-
declarator: (identifier) @variable.declaration.c)
177+
declarator: (identifier) @variable.other.declaration.c)
141178

142179
; The "x" in `SomeType *x;`
143180
; (Should work no matter how many pointers deep we are.)
144181
(pointer_declarator
145-
declarator: [(identifier) (field_identifier)] @variable.declaration.pointer.c
182+
declarator: [(identifier) (field_identifier)] @variable.other.declaration.pointer.c
146183
(#is? test.descendantOfType "declaration field_declaration"))
147184

185+
; An array declarator: the "table" in `int table[4];`
186+
(array_declarator
187+
declarator: (identifier) @variable.other.declaration.c)
188+
148189
; A member of a struct.
149190
(field_declaration
150-
(field_identifier) @variable.declaration.member.c)
191+
(field_identifier) @variable.other.declaration.member.c)
151192

152193
; An attribute in a C99 struct designated initializer:
153194
; the "foo" in `MY_TYPE a = { .foo = true };
154195
(initializer_pair
155196
(field_designator
156-
(field_identifier) @variable.declaration.member.c))
197+
(field_identifier) @variable.other.declaration.member.c))
157198

158199
; (and the associated ".")
159200
(initializer_pair
@@ -162,15 +203,15 @@
162203

163204
(field_declaration
164205
(pointer_declarator
165-
(field_identifier) @variable.declaration.member.c))
206+
(field_identifier) @variable.other.declaration.member.c))
166207

167208
(field_declaration
168209
(array_declarator
169-
(field_identifier) @variable.declaration.member.c))
210+
(field_identifier) @variable.other.declaration.member.c))
170211

171212
(init_declarator
172213
(pointer_declarator
173-
(identifier) @variable.declaration.member.c))
214+
(identifier) @variable.other.declaration.member.c))
174215

175216
; The "x" in `x = y;`
176217
(assignment_expression
@@ -253,8 +294,19 @@
253294
(false)
254295
] @constant.language._TYPE_.c
255296

256-
((identifier) @constant.c
257-
(#match? @constant.c "[_A-Z][_A-Z0-9]*$"))
297+
; Don't try to scope (e.g.) `int FOO = 1` as a constant when the user types `=`
298+
; but has not typed the value yet.
299+
(ERROR
300+
(identifier) @_IGNORE_
301+
(#set! capture.final))
302+
303+
; In most languages we wouldn't be making the assumption that an all-caps
304+
; identifier should be treated as a constant. But those languages don't have
305+
; macro preprocessors. The convention is decently strong in C/C++ that all-caps
306+
; identifiers will refer to `#define`d things.
307+
((identifier) @constant.other.c
308+
(#match? @constant.other.c "^[_A-Z][_A-Z0-9]*$")
309+
(#set! capture.shy))
258310

259311

260312
; COMMENTS

packages/language-c/grammars/tree-sitter-cpp/highlights.scm

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,55 @@
1313
"#define" @keyword.control.directive.define.cpp
1414
"#include" @keyword.control.directive.include.cpp
1515

16-
(["#if" "#ifdef" "#ifndef" "#endif" "#elif" "#else" "#define" "#include"] @punctuation.definition.directive.c
16+
(["#if" "#ifdef" "#ifndef" "#endif" "#elif" "#else" "#define" "#include"] @punctuation.definition.directive.cpp
1717
(#set! adjust.endAfterFirstMatchOf "^#"))
1818

19-
20-
; This will match if the more specific rules above haven't matched. The
21-
; anonymous nodes will match under ideal conditions, but might not be present
22-
; if the parser is flummoxed.
23-
((preproc_directive) @keyword.control.directive.c
19+
; `preproc_directive` will be used when the parser doesn't recognize the
20+
; directive as one of the above. It's permissive; `#afdfafsdfdfad` would be
21+
; parsed as a `preproc_directive`.
22+
;
23+
; Hence this rule will match if the more specific rules above haven't matched.
24+
; The anonymous nodes will match under ideal conditions, but might not be
25+
; present even when they ought to be _if_ the parser is flummoxed; so this'll
26+
; sometimes catch `#ifdef` and others.
27+
((preproc_directive) @keyword.control.directive.cpp
2428
(#set! capture.shy true))
2529

26-
((preproc_ifdef
27-
(identifier) @entity.name.function.preprocessor.c
28-
(#match? @entity.name.function.preprocessor.c "[a-zA-Z_$][\\w$]*")))
30+
((preproc_directive) @punctuation.definition.directive.cpp
31+
(#set! capture.shy true)
32+
(#set! adjust.endAfterFirstMatchOf "^#"))
2933

34+
; Macro functions are definitely entities.
3035
(preproc_function_def
31-
(identifier) @entity.name.function.preprocessor.c
36+
(identifier) @entity.name.function.preprocessor.cpp
3237
(#set! capture.final true))
3338

34-
(preproc_function_def
35-
(identifier) @entity.name.function.preprocessor.cpp
36-
(#set! capture.final true)
37-
)
39+
; Identifiers in macro definitions are definitely constants.
40+
((preproc_def
41+
name: (identifier) @constant.preprocessor.cpp))
3842

39-
(system_lib_string) @string.quoted.other.lt-gt.include.c
40-
((system_lib_string) @punctuation.definition.string.begin.c
43+
; We can also safely treat identifiers as constants in `#ifdef`…
44+
((preproc_ifdef
45+
(identifier) @constant.preprocessor.cpp))
46+
47+
; …and `#if` and `#elif`…
48+
(preproc_if
49+
(binary_expression
50+
(identifier) @constant.preprocessor.cpp))
51+
(preproc_elif
52+
(binary_expression
53+
(identifier) @constant.preprocessor.cpp))
54+
55+
; …and `#undef`.
56+
((preproc_call
57+
directive: (preproc_directive) @_IGNORE_
58+
argument: (preproc_arg) @constant.preprocessor.cpp)
59+
(#eq? @_IGNORE_ "#undef"))
60+
61+
(system_lib_string) @string.quoted.other.lt-gt.include.cpp
62+
((system_lib_string) @punctuation.definition.string.begin.cpp
4163
(#set! adjust.endAfterFirstMatchOf "^<"))
42-
((system_lib_string) @punctuation.definition.string.end.c
64+
((system_lib_string) @punctuation.definition.string.end.cpp
4365
(#set! adjust.startBeforeFirstMatchOf ">$"))
4466

4567

@@ -52,6 +74,13 @@
5274
(type_identifier) @_IGNORE_
5375
(#set! capture.final true))
5476

77+
; When the user has typed `#define FOO`, the macro injection thinks that `FOO`
78+
; is a type declaration (for some reason). This node structure seems to exist
79+
; only in that unusual and incorrect scenario, so we'll stop it from happening
80+
; so that it doesn't override the underlying `constant.other.c` scope.
81+
(translation_unit
82+
(type_identifier) @_IGNORE_
83+
(#set! capture.final))
5584

5685
(primitive_type) @support.type.builtin.cpp
5786

@@ -232,7 +261,7 @@
232261
; The "x" in `SomeType *x;`
233262
; (Should work no matter how many pointers deep we are.)
234263
(pointer_declarator
235-
declarator: [(identifier) (field_identifier)] @variable.declaration.pointer.c
264+
declarator: [(identifier) (field_identifier)] @variable.declaration.pointer.cpp
236265
(#is? test.descendantOfType "declaration field_declaration"))
237266

238267
; A member of a struct.
@@ -289,7 +318,7 @@
289318
; The "foo" in `const char *foo` within a parameter list.
290319
; (Should work no matter how many pointers deep we are.)
291320
(pointer_declarator
292-
declarator: [(identifier) (field_identifier)] @variable.parameter.pointer.c
321+
declarator: [(identifier) (field_identifier)] @variable.parameter.pointer.cpp
293322
(#is? test.descendantOfType "parameter_declaration"))
294323

295324
(parameter_declaration
@@ -332,8 +361,19 @@
332361
(false)
333362
] @constant.language._TYPE_.cpp
334363

335-
((identifier) @constant.cpp
336-
(#match? @constant.cpp "[_A-Z][_A-Z0-9]*$"))
364+
; Don't try to scope (e.g.) `int FOO = 1` as a constant when the user types `=`
365+
; but has not typed the value yet.
366+
(ERROR
367+
(identifier) @_IGNORE_
368+
(#set! capture.final))
369+
370+
; In most languages we wouldn't be making the assumption that an all-caps
371+
; identifier should be treated as a constant. But those languages don't have
372+
; macro preprocessors. The convention is decently strong in C/C++ that all-caps
373+
; identifiers will refer to `#define`d things.
374+
((identifier) @constant.other.cpp
375+
(#match? @constant.other.cpp "[_A-Z][_A-Z0-9]*$")
376+
(#set! capture.shy))
337377

338378

339379
; COMMENTS

packages/language-html/grammars/tree-sitter-html/folds.scm

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,74 @@
1+
; When dealing with a self-closing element that spans multiple lines, this lets
2+
; us fold the attribute list.
3+
;
4+
; This query captures elements that happen to be self-closing but don't end
5+
; with an XHTML-style ` />`. Because `tree-sitter-html` doesn't distinguish
6+
; these from elements that can have content, we have to check the tag name to
7+
; know how to treat these.
8+
9+
((element
10+
(start_tag
11+
(tag_name) @_IGNORE_) @fold)
12+
(#match? @_IGNORE_ "^(area|base|br|col|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$")
13+
)
14+
15+
; This one captures the XHTML-style nodes.
16+
(self_closing_tag) @fold
17+
18+
19+
; TODO: Right now, the fold cache doesn't work properly when a given range
20+
; satisfies more than one fold. We should employ `ScopeResolver` to fix this.
21+
22+
; Fold up all of
23+
;
24+
; <div
25+
; foo="bar"
26+
; baz="thud">
27+
;
28+
; </div>
29+
;
30+
; with the fold indicator appearing on whichever line has the `>` that closes
31+
; the opening tag.
32+
;
33+
; Usually this'll be the same line on which the tag opened; but when it isn't,
34+
; this allows for the attribute list of the opening element to be folded
35+
; separately from the element's contents.
36+
;
37+
38+
(element
39+
(start_tag
40+
(tag_name) @_IGNORE_
41+
">" @fold)
42+
(#not-match? @_IGNORE_ "^(area|base|br|col|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$")
43+
(#set! fold.endAt parent.parent.lastNamedChild.startPosition)
44+
(#set! fold.adjustToEndOfPreviousRow true)
45+
)
46+
47+
48+
; When we have…
49+
;
50+
; <div
51+
; foo="bar"
52+
; baz="thud"
53+
; >
54+
;
55+
; </div>
56+
;
57+
; …we can put a fold indicator on the line with `<div` and use it to fold up
58+
; all of a start tag's attributes.
59+
;
60+
; We keep the end of the fold on a separate line because otherwise we lose the
61+
; ability to independently toggle the folding of the element's contents.
62+
;
63+
(element
64+
(start_tag
65+
(tag_name) @_IGNORE_) @fold
66+
(#not-match? @_IGNORE_ "^(area|base|br|col|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)$")
67+
(#set! fold.endAt lastChild.startPosition)
68+
(#set! fold.adjustToEndOfPreviousRow true))
69+
170

271
[
3-
(element)
472
(script_element)
573
(style_element)
674
] @fold

packages/language-html/grammars/tree-sitter-html/indents.scm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
((start_tag) @indent
33
; Only indent if this isn't a self-closing tag.
4-
(#not-match? @indent "^<(?:area|base|br|col|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)\\s"))
4+
(#not-match? @indent "^<(?:area|base|br|col|embed|hr|img|input|keygen|link|meta|param|source|track|wbr)(?=\\s|>)"))
55

66
; `end_tag` will still match when only `</div` is present. Without enforcing
77
; the presence of `>`, the dedent happens too soon.

packages/language-php/grammars/modern-tree-sitter-phpdoc.cson

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ parser: 'tree-sitter-phpdoc'
66
injectionRegex: '^(phpdoc|PHPDoc)$'
77

88
treeSitter:
9-
parserSource: 'github:claytonrcarter/tree-sitter-phpdoc#915a527d5aafa81b31acf67fab31b0ac6b6319c0'
9+
parserSource: 'github:claytonrcarter/tree-sitter-phpdoc#f285e338d328a03920a9bfd8dda78585c7ddcca3'
1010
grammar: 'tree-sitter/tree-sitter-phpdoc.wasm'
1111
highlightsQuery: 'tree-sitter/queries/phpdoc/highlights.scm'

0 commit comments

Comments
 (0)