Skip to content

Commit 2892f0f

Browse files
authored
Merge pull request github#17873 from github/tausbn/python-fix-generator-expression-locations
Python: Even more parser fixes
2 parents 03ffaac + 5d6600e commit 2892f0f

File tree

11 files changed

+31902
-31207
lines changed

11 files changed

+31902
-31207
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
async def foo():
2+
await bar() + await baz()
3+
4+
async with foo() as bar, baz() as quux:
5+
pass
6+
7+
async for spam in eggs:
8+
pass

python/extractor/tests/parser/collections.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@
3535
x, y,
3636
#comment
3737
)
38+
39+
((z,))

python/extractor/tests/parser/comprehensions.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,10 @@
6565
d for e in f if g # comment
6666
# comment
6767
] # comment
68+
69+
# Generator expression with comments
70+
(# comment
71+
alpha # comment
72+
for beta in gamma # comment
73+
# comment
74+
)

python/extractor/tests/parser/strings.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,12 @@
7777
b'\xc5\xe5'
7878
if 35:
7979
f"{x=}"
80+
if 36:
81+
r"a\"a"
82+
if 37:
83+
r'a\'a'
84+
if 38:
85+
r'a\\'
86+
if 39:
87+
r'a\
88+
'

python/extractor/tsg-python/python.tsg

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@
404404

405405
;;; GeneratorExp
406406

407-
(generator_expression . "(" . (comment)* . (_) @start (_) @end . (comment)* . ")" .) @generatorexp
407+
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @generatorexp
408408
{
409409
attr (@generatorexp.node) _location_start = (location-start @start)
410410
attr (@generatorexp.node) _location_end = (location-end @end)
@@ -416,13 +416,13 @@
416416
attr (@if.node) _location_end = (location-end @expr)
417417
}
418418

419-
(generator_expression . "(" . (comment)* . (_) @start (for_in_clause) @child (_) @end . (comment)* . ")" .) @genexpr
419+
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @child [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
420420
{
421421
attr (@child.node) _location_start = (location-start @start)
422422
attr (@child.node) _location_end = (location-end @end)
423423
}
424424

425-
(generator_expression . "(" . (comment)* . (_) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
425+
(generator_expression . "(" . (comment)* . (expression) @start (for_in_clause) @end . (comment)* . ")" .) @genexpr
426426
{
427427
attr (@end.node) _location_start = (location-start @start)
428428
attr (@end.node) _location_end = (location-end @end)
@@ -863,7 +863,7 @@
863863
; information for the entire generator expression (yes, it is a wide parameter!) and so we must recreate the logic for
864864
; setting this location information correctly.
865865

866-
(generator_expression . "(" . (comment)* . (_) @start (_) @end . (comment)* . ")" .) @genexpr
866+
(generator_expression . "(" . (comment)* . (expression) @start [(for_in_clause) (if_clause)] @end . (comment)* . ")" .) @genexpr
867867
{
868868
; Synthesize the `genexpr` function
869869
let @genexpr.fun = (ast-node @genexpr "Function")
@@ -2661,6 +2661,14 @@
26612661
let @with.first = @first.node
26622662
}
26632663

2664+
; Async status
2665+
; NOTE: We only set the `is_async` field on the _first_ clause of the `with` statement,
2666+
; as this is the behaviour of the old parser.
2667+
(with_statement "async" "with" @with_keyword (with_clause . (with_item) @with))
2668+
{
2669+
attr (@with.node) is_async = #true
2670+
}
2671+
26642672
(with_item
26652673
value: (_) @value
26662674
) @with
@@ -3264,6 +3272,16 @@
32643272
}
32653273
}
32663274

3275+
; Async status
3276+
(function_definition "async" "def" @def_keyword) @funcdef
3277+
{
3278+
let start = (location-start @def_keyword)
3279+
attr (@funcdef.function) is_async = #true
3280+
attr (@funcdef.node) _location_start = start
3281+
attr (@funcdef.function) _location_start = start
3282+
attr (@funcdef.funcexpr) _location_start = start
3283+
}
3284+
32673285
;;; Decorators
32683286

32693287
(decorated_definition
@@ -3478,5 +3496,9 @@
34783496

34793497
[(tuple element: (_)) (tuple_pattern)] @tup
34803498
{
3481-
attr (@tup.node) parenthesised = #true
3499+
; In order to avoid writing to the `parenthesised` attribute twice, we only set it here
3500+
; if the surrounding expression is not a `parenthesized_expression`.
3501+
if (not (instance-of (get-parent @tup) "parenthesized_expression")) {
3502+
attr (@tup.node) parenthesised = #true
3503+
}
34823504
}

python/extractor/tsg-python/tsp/grammar.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -751,14 +751,14 @@ module.exports = grammar({
751751
$.comparison_operator,
752752
$.not_operator,
753753
$.boolean_operator,
754-
$.await,
755754
$.lambda,
756755
$.primary_expression,
757756
$.conditional_expression,
758757
$.named_expression
759758
),
760759

761760
primary_expression: $ => choice(
761+
$.await,
762762
$.binary_operator,
763763
$.identifier,
764764
$.keyword_identifier,
@@ -1202,7 +1202,7 @@ module.exports = grammar({
12021202

12031203
await: $ => prec(PREC.unary, seq(
12041204
'await',
1205-
$.expression
1205+
$.primary_expression
12061206
)),
12071207

12081208
comment: $ => token(seq('#', /.*/)),

python/extractor/tsg-python/tsp/src/grammar.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
{
2-
"$schema": "https://tree-sitter.github.io/tree-sitter/assets/schemas/grammar.schema.json",
32
"name": "python",
43
"word": "identifier",
54
"rules": {
@@ -3843,10 +3842,6 @@
38433842
"type": "SYMBOL",
38443843
"name": "boolean_operator"
38453844
},
3846-
{
3847-
"type": "SYMBOL",
3848-
"name": "await"
3849-
},
38503845
{
38513846
"type": "SYMBOL",
38523847
"name": "lambda"
@@ -3868,6 +3863,10 @@
38683863
"primary_expression": {
38693864
"type": "CHOICE",
38703865
"members": [
3866+
{
3867+
"type": "SYMBOL",
3868+
"name": "await"
3869+
},
38713870
{
38723871
"type": "SYMBOL",
38733872
"name": "binary_operator"
@@ -6586,7 +6585,7 @@
65866585
},
65876586
{
65886587
"type": "SYMBOL",
6589-
"name": "expression"
6588+
"name": "primary_expression"
65906589
}
65916590
]
65926591
}
@@ -6696,3 +6695,4 @@
66966695
"parameter"
66976696
]
66986697
}
6698+

python/extractor/tsg-python/tsp/src/node-types.json

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,6 @@
115115
"type": "expression",
116116
"named": true,
117117
"subtypes": [
118-
{
119-
"type": "await",
120-
"named": true
121-
},
122118
{
123119
"type": "boolean_operator",
124120
"named": true
@@ -229,6 +225,10 @@
229225
"type": "attribute",
230226
"named": true
231227
},
228+
{
229+
"type": "await",
230+
"named": true
231+
},
232232
{
233233
"type": "binary_operator",
234234
"named": true
@@ -587,7 +587,7 @@
587587
"required": true,
588588
"types": [
589589
{
590-
"type": "expression",
590+
"type": "primary_expression",
591591
"named": true
592592
}
593593
]
@@ -2691,7 +2691,6 @@
26912691
{
26922692
"type": "module",
26932693
"named": true,
2694-
"root": true,
26952694
"fields": {},
26962695
"children": {
26972696
"multiple": true,
@@ -3816,10 +3815,6 @@
38163815
"type": ":=",
38173816
"named": false
38183817
},
3819-
{
3820-
"type": ";",
3821-
"named": false
3822-
},
38233818
{
38243819
"type": "<",
38253820
"named": false
@@ -3876,10 +3871,6 @@
38763871
"type": "[",
38773872
"named": false
38783873
},
3879-
{
3880-
"type": "\\",
3881-
"named": false
3882-
},
38833874
{
38843875
"type": "]",
38853876
"named": false

0 commit comments

Comments
 (0)