Skip to content

Commit 5db601a

Browse files
committed
Python: Allow comments in comprehensions
A somewhat complicated solution that necessitated adding a new custom function to `tsg-python`. See the comments in `python.tsg` for why this was necessary.
1 parent 4f60494 commit 5db601a

File tree

3 files changed

+60
-51
lines changed

3 files changed

+60
-51
lines changed

python/extractor/tests/parser/comprehensions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,13 @@
5555
t = tuple(x for y in z)
5656

5757
[( t, ) for v in w]
58+
59+
[# comment
60+
a for b in c # comment
61+
# comment
62+
] # comment
63+
64+
[# comment
65+
d for e in f if g # comment
66+
# comment
67+
] # comment

python/extractor/tsg-python/python.tsg

Lines changed: 28 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,30 +1062,38 @@
10621062
let @genexpr.result = tuple
10631063
}
10641064

1065-
; For the final `if` clause, we need to hook it up with the `yield` expression and with its associated `for` clause.
1065+
; For the final clause, we need to hook it up with the rest of the expression.
1066+
; If it's an `if` clause, we need to hook it up with the `yield` expression and with its associated
1067+
; `for` clause.
1068+
; If it's a `for` clause, we only need to create and hook it up with the `yield` expression.
1069+
;
1070+
; It would be tempting to use anchors here, but they just don't work. In particular, an anchor of
1071+
; the form `. (comment)* . )` (which would be needed in order to handle the case where there are
1072+
; comments after the last clause) cause the `tree-sitter` query engine to match _all_ clauses, not
1073+
; just the last one.
1074+
; Instead, we gather up all clauses in a list (these will be in the order they appear in the source
1075+
; code), and extract the last element using a custom Rust function.
10661076
[
10671077
(generator_expression
10681078
body: (_) @body
1069-
(if_clause) @last
1070-
.
1079+
[(if_clause) (for_in_clause)]+ @last_candidates
10711080
) @genexpr
10721081
(list_comprehension
10731082
body: (_) @body
1074-
(if_clause) @last
1075-
.
1083+
[(if_clause) (for_in_clause)]+ @last_candidates
10761084
) @genexpr
10771085
(set_comprehension
10781086
body: (_) @body
1079-
(if_clause) @last
1080-
.
1087+
[(if_clause) (for_in_clause)]+ @last_candidates
10811088
) @genexpr
10821089
(dictionary_comprehension
10831090
body: (_) @body
1084-
(if_clause) @last
1085-
.
1091+
[(if_clause) (for_in_clause)]+ @last_candidates
10861092
) @genexpr
10871093
]
10881094
{
1095+
let last = (get-last-element @last_candidates)
1096+
10891097
let expr = (ast-node @body "Expr")
10901098
let yield = (ast-node @body "Yield")
10911099

@@ -1096,50 +1104,19 @@
10961104

10971105
attr (yield) value = @genexpr.result
10981106
attr (@body.node) ctx = "load"
1099-
edge @last.first_if -> expr
1100-
attr (@last.first_if -> expr) body = 0
1101-
1102-
; Hook up this `if` clause with its `for` clause
1103-
edge @last.for -> @last.node
1104-
attr (@last.for -> @last.node) body = 0
1105-
}
1106-
1107-
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
1108-
[
1109-
(generator_expression
1110-
body: (_) @body
1111-
(for_in_clause) @last
1112-
.
1113-
) @genexpr
1114-
(list_comprehension
1115-
body: (_) @body
1116-
(for_in_clause) @last
1117-
.
1118-
) @genexpr
1119-
(set_comprehension
1120-
body: (_) @body
1121-
(for_in_clause) @last
1122-
.
1123-
) @genexpr
1124-
(dictionary_comprehension
1125-
body: (_) @body
1126-
(for_in_clause) @last
1127-
.
1128-
) @genexpr
1129-
]
1130-
{
1131-
let expr = (ast-node @body "Expr")
1132-
let yield = (ast-node @body "Yield")
11331107

1134-
let @genexpr.expr = expr
1135-
let @genexpr.yield = yield
1136-
1137-
attr (expr) value = yield
1108+
if (instance-of last "if_clause") {
1109+
edge last.first_if -> expr
1110+
attr (last.first_if -> expr) body = 0
11381111

1139-
attr (yield) value = @genexpr.result
1140-
attr (@body.node) ctx = "load"
1141-
edge @last.node -> expr
1142-
attr (@last.node -> expr) body = 0
1112+
; Hook up this `if` clause with its `for` clause
1113+
edge last.for -> last.node
1114+
attr (last.for -> last.node) body = 0
1115+
} else {
1116+
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
1117+
edge last.node -> expr
1118+
attr (last.node -> expr) body = 0
1119+
}
11431120
}
11441121

11451122
; For whatever reason, we do not consider parentheses around the yielded expression if they are present, so

python/extractor/tsg-python/src/main.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,22 @@ pub mod extra_functions {
463463
Ok(Value::Integer(left % right))
464464
}
465465
}
466+
467+
pub struct GetLastElement;
468+
469+
impl Function for GetLastElement {
470+
fn call(
471+
&self,
472+
_graph: &mut Graph,
473+
_source: &str,
474+
parameters: &mut dyn Parameters,
475+
) -> Result<Value, ExecutionError> {
476+
let list = parameters.param()?.into_list()?;
477+
parameters.finish()?;
478+
let last = list.last().unwrap_or(&Value::Null).clone();
479+
Ok(last)
480+
}
481+
}
466482
}
467483

468484
fn main() -> Result<()> {
@@ -562,6 +578,12 @@ fn main() -> Result<()> {
562578
);
563579

564580
functions.add(Identifier::from("mod"), extra_functions::Modulo);
581+
582+
functions.add(
583+
Identifier::from("get-last-element"),
584+
extra_functions::GetLastElement,
585+
);
586+
565587
let globals = Variables::new();
566588
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
567589
let graph = file

0 commit comments

Comments
 (0)