Skip to content

Commit b2666e3

Browse files
committed
non-recursively implement remap_variables
1 parent 935af10 commit b2666e3

File tree

9 files changed

+148
-129
lines changed

9 files changed

+148
-129
lines changed

bigframes/core/rewrite/identifiers.py

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,47 @@ def remap_variables(
3030
3131
Note: this will convert a DAG to a tree.
3232
"""
33-
child_replacement_map = dict()
34-
ref_mapping = dict()
33+
34+
def _remap_node(
35+
node: nodes.BigFrameNode,
36+
child_results: typing.Tuple[
37+
typing.Tuple[
38+
nodes.BigFrameNode,
39+
dict[identifiers.ColumnId, identifiers.ColumnId],
40+
],
41+
...,
42+
],
43+
) -> typing.Tuple[
44+
nodes.BigFrameNode,
45+
dict[identifiers.ColumnId, identifiers.ColumnId],
46+
]:
47+
child_replacement_map = {
48+
original_child: result[0]
49+
for original_child, result in zip(node.child_nodes, child_results)
50+
}
51+
ref_mapping = {}
52+
for _, child_var_mapping in child_results:
53+
ref_mapping.update(child_var_mapping)
54+
55+
# This is actually invalid until we've replaced all of children, refs and var defs
56+
with_new_children = node.transform_children(
57+
lambda child_node: child_replacement_map[child_node]
58+
)
59+
60+
with_new_refs = with_new_children.remap_refs(ref_mapping)
61+
62+
node_var_mapping = {
63+
old_id: next(id_generator) for old_id in node.node_defined_ids
64+
}
65+
with_new_vars = with_new_refs.remap_vars(node_var_mapping)
66+
with_new_vars._validate()
67+
68+
return (
69+
with_new_vars,
70+
node_var_mapping
71+
if node.defines_namespace
72+
else (ref_mapping | node_var_mapping),
73+
)
74+
3575
# Sequential ids are assigned bottom-up left-to-right
36-
for child in root.child_nodes:
37-
new_child, child_var_mapping = remap_variables(child, id_generator=id_generator)
38-
child_replacement_map[child] = new_child
39-
ref_mapping.update(child_var_mapping)
40-
41-
# This is actually invalid until we've replaced all of children, refs and var defs
42-
with_new_children = root.transform_children(
43-
lambda node: child_replacement_map[node]
44-
)
45-
46-
with_new_refs = with_new_children.remap_refs(ref_mapping)
47-
48-
node_var_mapping = {old_id: next(id_generator) for old_id in root.node_defined_ids}
49-
with_new_vars = with_new_refs.remap_vars(node_var_mapping)
50-
with_new_vars._validate()
51-
52-
return (
53-
with_new_vars,
54-
node_var_mapping
55-
if root.defines_namespace
56-
else (ref_mapping | node_var_mapping),
57-
)
76+
return root.reduce_up(_remap_node)
Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,78 @@
1-
WITH `bfcte_1` AS (
1+
WITH `bfcte_0` AS (
22
SELECT
33
*
44
FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` INT64, `bfcol_2` INT64, `bfcol_3` STRING, `bfcol_4` INT64>>[STRUCT(0, 123456789, 0, 'Hello, World!', 0), STRUCT(1, -987654321, 1, 'こんにちは', 1), STRUCT(2, 314159, 2, ' ¡Hola Mundo! ', 2), STRUCT(3, CAST(NULL AS INT64), 3, CAST(NULL AS STRING), 3), STRUCT(4, -234892, 4, 'Hello, World!', 4), STRUCT(5, 55555, 5, 'Güten Tag!', 5), STRUCT(6, 101202303, 6, 'capitalize, This ', 6), STRUCT(7, -214748367, 7, ' سلام', 7), STRUCT(8, 2, 8, 'T', 8)])
5-
), `bfcte_3` AS (
5+
), `bfcte_2` AS (
66
SELECT
77
*,
8-
`bfcol_4` AS `bfcol_10`
9-
FROM `bfcte_1`
10-
), `bfcte_5` AS (
8+
`bfcol_4` AS `bfcol_11`
9+
FROM `bfcte_0`
10+
), `bfcte_4` AS (
1111
SELECT
1212
*,
13-
0 AS `bfcol_16`
14-
FROM `bfcte_3`
13+
0 AS `bfcol_23`
14+
FROM `bfcte_2`
1515
), `bfcte_6` AS (
1616
SELECT
17-
`bfcol_0` AS `bfcol_17`,
18-
`bfcol_2` AS `bfcol_18`,
19-
`bfcol_1` AS `bfcol_19`,
20-
`bfcol_3` AS `bfcol_20`,
21-
`bfcol_16` AS `bfcol_21`,
22-
`bfcol_10` AS `bfcol_22`
23-
FROM `bfcte_5`
24-
), `bfcte_0` AS (
17+
`bfcol_0` AS `bfcol_30`,
18+
`bfcol_2` AS `bfcol_31`,
19+
`bfcol_1` AS `bfcol_32`,
20+
`bfcol_3` AS `bfcol_33`,
21+
`bfcol_23` AS `bfcol_34`,
22+
`bfcol_11` AS `bfcol_35`
23+
FROM `bfcte_4`
24+
), `bfcte_1` AS (
2525
SELECT
2626
*
27-
FROM UNNEST(ARRAY<STRUCT<`bfcol_23` INT64, `bfcol_24` INT64, `bfcol_25` INT64, `bfcol_26` STRING, `bfcol_27` INT64>>[STRUCT(0, 123456789, 0, 'Hello, World!', 0), STRUCT(1, -987654321, 1, 'こんにちは', 1), STRUCT(2, 314159, 2, ' ¡Hola Mundo! ', 2), STRUCT(3, CAST(NULL AS INT64), 3, CAST(NULL AS STRING), 3), STRUCT(4, -234892, 4, 'Hello, World!', 4), STRUCT(5, 55555, 5, 'Güten Tag!', 5), STRUCT(6, 101202303, 6, 'capitalize, This ', 6), STRUCT(7, -214748367, 7, ' سلام', 7), STRUCT(8, 2, 8, 'T', 8)])
28-
), `bfcte_2` AS (
27+
FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` INT64, `bfcol_2` INT64, `bfcol_3` STRING, `bfcol_4` INT64>>[STRUCT(0, 123456789, 0, 'Hello, World!', 0), STRUCT(1, -987654321, 1, 'こんにちは', 1), STRUCT(2, 314159, 2, ' ¡Hola Mundo! ', 2), STRUCT(3, CAST(NULL AS INT64), 3, CAST(NULL AS STRING), 3), STRUCT(4, -234892, 4, 'Hello, World!', 4), STRUCT(5, 55555, 5, 'Güten Tag!', 5), STRUCT(6, 101202303, 6, 'capitalize, This ', 6), STRUCT(7, -214748367, 7, ' سلام', 7), STRUCT(8, 2, 8, 'T', 8)])
28+
), `bfcte_3` AS (
2929
SELECT
3030
*,
31-
`bfcol_27` AS `bfcol_33`
32-
FROM `bfcte_0`
33-
), `bfcte_4` AS (
31+
`bfcol_4` AS `bfcol_10`
32+
FROM `bfcte_1`
33+
), `bfcte_5` AS (
3434
SELECT
3535
*,
36-
1 AS `bfcol_39`
37-
FROM `bfcte_2`
36+
1 AS `bfcol_22`
37+
FROM `bfcte_3`
3838
), `bfcte_7` AS (
3939
SELECT
40-
`bfcol_23` AS `bfcol_40`,
41-
`bfcol_25` AS `bfcol_41`,
42-
`bfcol_24` AS `bfcol_42`,
43-
`bfcol_26` AS `bfcol_43`,
44-
`bfcol_39` AS `bfcol_44`,
45-
`bfcol_33` AS `bfcol_45`
46-
FROM `bfcte_4`
40+
`bfcol_0` AS `bfcol_24`,
41+
`bfcol_2` AS `bfcol_25`,
42+
`bfcol_1` AS `bfcol_26`,
43+
`bfcol_3` AS `bfcol_27`,
44+
`bfcol_22` AS `bfcol_28`,
45+
`bfcol_10` AS `bfcol_29`
46+
FROM `bfcte_5`
4747
), `bfcte_8` AS (
4848
SELECT
4949
*
5050
FROM (
5151
SELECT
52-
`bfcol_17` AS `bfcol_46`,
53-
`bfcol_18` AS `bfcol_47`,
54-
`bfcol_19` AS `bfcol_48`,
55-
`bfcol_20` AS `bfcol_49`,
56-
`bfcol_21` AS `bfcol_50`,
57-
`bfcol_22` AS `bfcol_51`
52+
`bfcol_30` AS `bfcol_36`,
53+
`bfcol_31` AS `bfcol_37`,
54+
`bfcol_32` AS `bfcol_38`,
55+
`bfcol_33` AS `bfcol_39`,
56+
`bfcol_34` AS `bfcol_40`,
57+
`bfcol_35` AS `bfcol_41`
5858
FROM `bfcte_6`
5959
UNION ALL
6060
SELECT
61-
`bfcol_40` AS `bfcol_46`,
62-
`bfcol_41` AS `bfcol_47`,
63-
`bfcol_42` AS `bfcol_48`,
64-
`bfcol_43` AS `bfcol_49`,
65-
`bfcol_44` AS `bfcol_50`,
66-
`bfcol_45` AS `bfcol_51`
61+
`bfcol_24` AS `bfcol_36`,
62+
`bfcol_25` AS `bfcol_37`,
63+
`bfcol_26` AS `bfcol_38`,
64+
`bfcol_27` AS `bfcol_39`,
65+
`bfcol_28` AS `bfcol_40`,
66+
`bfcol_29` AS `bfcol_41`
6767
FROM `bfcte_7`
6868
)
6969
)
7070
SELECT
71-
`bfcol_46` AS `rowindex`,
72-
`bfcol_47` AS `rowindex_1`,
73-
`bfcol_48` AS `int64_col`,
74-
`bfcol_49` AS `string_col`
71+
`bfcol_36` AS `rowindex`,
72+
`bfcol_37` AS `rowindex_1`,
73+
`bfcol_38` AS `int64_col`,
74+
`bfcol_39` AS `string_col`
7575
FROM `bfcte_8`
7676
ORDER BY
77-
`bfcol_50` ASC NULLS LAST,
78-
`bfcol_51` ASC NULLS LAST
77+
`bfcol_40` ASC NULLS LAST,
78+
`bfcol_41` ASC NULLS LAST
Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
WITH `bfcte_1` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`,
4-
`rowindex` AS `bfcol_1`
3+
`int64_col` AS `bfcol_2`,
4+
`rowindex` AS `bfcol_3`
55
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
66
), `bfcte_2` AS (
77
SELECT
8-
`bfcol_1` AS `bfcol_2`,
9-
`bfcol_0` AS `bfcol_3`
8+
`bfcol_3` AS `bfcol_6`,
9+
`bfcol_2` AS `bfcol_7`
1010
FROM `bfcte_1`
1111
), `bfcte_0` AS (
1212
SELECT
13-
`int64_col` AS `bfcol_4`,
14-
`int64_too` AS `bfcol_5`
13+
`int64_col` AS `bfcol_0`,
14+
`int64_too` AS `bfcol_1`
1515
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1616
), `bfcte_3` AS (
1717
SELECT
18-
`bfcol_4` AS `bfcol_6`,
19-
`bfcol_5` AS `bfcol_7`
18+
`bfcol_0` AS `bfcol_4`,
19+
`bfcol_1` AS `bfcol_5`
2020
FROM `bfcte_0`
2121
), `bfcte_4` AS (
2222
SELECT
2323
*
2424
FROM `bfcte_2`
2525
LEFT JOIN `bfcte_3`
26-
ON COALESCE(`bfcol_2`, 0) = COALESCE(`bfcol_6`, 0)
27-
AND COALESCE(`bfcol_2`, 1) = COALESCE(`bfcol_6`, 1)
26+
ON COALESCE(`bfcol_6`, 0) = COALESCE(`bfcol_4`, 0)
27+
AND COALESCE(`bfcol_6`, 1) = COALESCE(`bfcol_4`, 1)
2828
)
2929
SELECT
30-
`bfcol_3` AS `int64_col`,
31-
`bfcol_7` AS `int64_too`
30+
`bfcol_7` AS `int64_col`,
31+
`bfcol_5` AS `int64_too`
3232
FROM `bfcte_4`

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/bool_col/out.sql

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,24 @@ WITH `bfcte_1` AS (
1010
FROM `bfcte_1`
1111
), `bfcte_0` AS (
1212
SELECT
13-
`bool_col` AS `bfcol_4`,
14-
`rowindex` AS `bfcol_5`
13+
`bool_col` AS `bfcol_0`,
14+
`rowindex` AS `bfcol_1`
1515
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1616
), `bfcte_3` AS (
1717
SELECT
18-
`bfcol_5` AS `bfcol_6`,
19-
`bfcol_4` AS `bfcol_7`
18+
`bfcol_1` AS `bfcol_4`,
19+
`bfcol_0` AS `bfcol_5`
2020
FROM `bfcte_0`
2121
), `bfcte_4` AS (
2222
SELECT
2323
*
2424
FROM `bfcte_2`
2525
INNER JOIN `bfcte_3`
26-
ON COALESCE(CAST(`bfcol_3` AS STRING), '0') = COALESCE(CAST(`bfcol_7` AS STRING), '0')
27-
AND COALESCE(CAST(`bfcol_3` AS STRING), '1') = COALESCE(CAST(`bfcol_7` AS STRING), '1')
26+
ON COALESCE(CAST(`bfcol_3` AS STRING), '0') = COALESCE(CAST(`bfcol_5` AS STRING), '0')
27+
AND COALESCE(CAST(`bfcol_3` AS STRING), '1') = COALESCE(CAST(`bfcol_5` AS STRING), '1')
2828
)
2929
SELECT
3030
`bfcol_2` AS `rowindex_x`,
3131
`bfcol_3` AS `bool_col`,
32-
`bfcol_6` AS `rowindex_y`
32+
`bfcol_4` AS `rowindex_y`
3333
FROM `bfcte_4`

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/float64_col/out.sql

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,24 @@ WITH `bfcte_1` AS (
1010
FROM `bfcte_1`
1111
), `bfcte_0` AS (
1212
SELECT
13-
`float64_col` AS `bfcol_4`,
14-
`rowindex` AS `bfcol_5`
13+
`float64_col` AS `bfcol_0`,
14+
`rowindex` AS `bfcol_1`
1515
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1616
), `bfcte_3` AS (
1717
SELECT
18-
`bfcol_5` AS `bfcol_6`,
19-
`bfcol_4` AS `bfcol_7`
18+
`bfcol_1` AS `bfcol_4`,
19+
`bfcol_0` AS `bfcol_5`
2020
FROM `bfcte_0`
2121
), `bfcte_4` AS (
2222
SELECT
2323
*
2424
FROM `bfcte_2`
2525
INNER JOIN `bfcte_3`
26-
ON IF(IS_NAN(`bfcol_3`), 2, COALESCE(`bfcol_3`, 0)) = IF(IS_NAN(`bfcol_7`), 2, COALESCE(`bfcol_7`, 0))
27-
AND IF(IS_NAN(`bfcol_3`), 3, COALESCE(`bfcol_3`, 1)) = IF(IS_NAN(`bfcol_7`), 3, COALESCE(`bfcol_7`, 1))
26+
ON IF(IS_NAN(`bfcol_3`), 2, COALESCE(`bfcol_3`, 0)) = IF(IS_NAN(`bfcol_5`), 2, COALESCE(`bfcol_5`, 0))
27+
AND IF(IS_NAN(`bfcol_3`), 3, COALESCE(`bfcol_3`, 1)) = IF(IS_NAN(`bfcol_5`), 3, COALESCE(`bfcol_5`, 1))
2828
)
2929
SELECT
3030
`bfcol_2` AS `rowindex_x`,
3131
`bfcol_3` AS `float64_col`,
32-
`bfcol_6` AS `rowindex_y`
32+
`bfcol_4` AS `rowindex_y`
3333
FROM `bfcte_4`

tests/unit/core/compile/sqlglot/snapshots/test_compile_join/test_compile_join_w_on/int64_col/out.sql

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,24 @@ WITH `bfcte_1` AS (
1010
FROM `bfcte_1`
1111
), `bfcte_0` AS (
1212
SELECT
13-
`int64_col` AS `bfcol_4`,
14-
`rowindex` AS `bfcol_5`
13+
`int64_col` AS `bfcol_0`,
14+
`rowindex` AS `bfcol_1`
1515
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1616
), `bfcte_3` AS (
1717
SELECT
18-
`bfcol_5` AS `bfcol_6`,
19-
`bfcol_4` AS `bfcol_7`
18+
`bfcol_1` AS `bfcol_4`,
19+
`bfcol_0` AS `bfcol_5`
2020
FROM `bfcte_0`
2121
), `bfcte_4` AS (
2222
SELECT
2323
*
2424
FROM `bfcte_2`
2525
INNER JOIN `bfcte_3`
26-
ON COALESCE(`bfcol_3`, 0) = COALESCE(`bfcol_7`, 0)
27-
AND COALESCE(`bfcol_3`, 1) = COALESCE(`bfcol_7`, 1)
26+
ON COALESCE(`bfcol_3`, 0) = COALESCE(`bfcol_5`, 0)
27+
AND COALESCE(`bfcol_3`, 1) = COALESCE(`bfcol_5`, 1)
2828
)
2929
SELECT
3030
`bfcol_2` AS `rowindex_x`,
3131
`bfcol_3` AS `int64_col`,
32-
`bfcol_6` AS `rowindex_y`
32+
`bfcol_4` AS `rowindex_y`
3333
FROM `bfcte_4`
Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
WITH `bfcte_1` AS (
1+
WITH `bfcte_0` AS (
22
SELECT
33
`numeric_col` AS `bfcol_0`,
44
`rowindex` AS `bfcol_1`
@@ -7,27 +7,27 @@ WITH `bfcte_1` AS (
77
SELECT
88
`bfcol_1` AS `bfcol_2`,
99
`bfcol_0` AS `bfcol_3`
10-
FROM `bfcte_1`
11-
), `bfcte_0` AS (
10+
FROM `bfcte_0`
11+
), `bfcte_1` AS (
1212
SELECT
13-
`numeric_col` AS `bfcol_4`,
14-
`rowindex` AS `bfcol_5`
13+
`numeric_col` AS `bfcol_0`,
14+
`rowindex` AS `bfcol_1`
1515
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1616
), `bfcte_3` AS (
1717
SELECT
18-
`bfcol_5` AS `bfcol_6`,
19-
`bfcol_4` AS `bfcol_7`
20-
FROM `bfcte_0`
18+
`bfcol_1` AS `bfcol_4`,
19+
`bfcol_0` AS `bfcol_5`
20+
FROM `bfcte_1`
2121
), `bfcte_4` AS (
2222
SELECT
2323
*
2424
FROM `bfcte_2`
2525
INNER JOIN `bfcte_3`
26-
ON COALESCE(`bfcol_3`, CAST(0 AS NUMERIC)) = COALESCE(`bfcol_7`, CAST(0 AS NUMERIC))
27-
AND COALESCE(`bfcol_3`, CAST(1 AS NUMERIC)) = COALESCE(`bfcol_7`, CAST(1 AS NUMERIC))
26+
ON COALESCE(`bfcol_3`, CAST(0 AS NUMERIC)) = COALESCE(`bfcol_5`, CAST(0 AS NUMERIC))
27+
AND COALESCE(`bfcol_3`, CAST(1 AS NUMERIC)) = COALESCE(`bfcol_5`, CAST(1 AS NUMERIC))
2828
)
2929
SELECT
3030
`bfcol_2` AS `rowindex_x`,
3131
`bfcol_3` AS `numeric_col`,
32-
`bfcol_6` AS `rowindex_y`
32+
`bfcol_4` AS `rowindex_y`
3333
FROM `bfcte_4`

0 commit comments

Comments
 (0)