Skip to content

Commit e1e1141

Browse files
authored
refactor: fix isin, explode and geo ops for sqlglot compiler (#2366)
This change can resolve the following bugs in #2248: - fix `test_isin_*` tests failures. - fix `test_*explode*` tests failures. - fix `test_geo_x_non_point` tests failure. Fixes internal issue 417774347🦕
1 parent cd3df33 commit e1e1141

File tree

9 files changed

+47
-32
lines changed

9 files changed

+47
-32
lines changed

bigframes/core/compile/sqlglot/expressions/comparison_ops.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,23 @@
3131
@register_unary_op(ops.IsInOp, pass_op=True)
3232
def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression:
3333
values = []
34-
is_numeric_expr = dtypes.is_numeric(expr.dtype)
34+
is_numeric_expr = dtypes.is_numeric(expr.dtype, include_bool=False)
3535
for value in op.values:
36-
if value is None:
36+
if _is_null(value):
3737
continue
3838
dtype = dtypes.bigframes_type(type(value))
39-
if expr.dtype == dtype or is_numeric_expr and dtypes.is_numeric(dtype):
39+
if (
40+
expr.dtype == dtype
41+
or is_numeric_expr
42+
and dtypes.is_numeric(dtype, include_bool=False)
43+
):
4044
values.append(sge.convert(value))
4145

4246
if op.match_nulls:
4347
contains_nulls = any(_is_null(value) for value in op.values)
4448
if contains_nulls:
49+
if len(values) == 0:
50+
return sge.Is(this=expr.expr, expression=sge.Null())
4551
return sge.Is(this=expr.expr, expression=sge.Null()) | sge.In(
4652
this=expr.expr, expressions=values
4753
)

bigframes/core/compile/sqlglot/expressions/geo_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,12 @@ def _(expr: TypedExpr, op: ops.GeoStSimplifyOp) -> sge.Expression:
108108

109109
@register_unary_op(ops.geo_x_op)
110110
def _(expr: TypedExpr) -> sge.Expression:
111-
return sge.func("SAFE.ST_X", expr.expr)
111+
return sge.func("ST_X", expr.expr)
112112

113113

114114
@register_unary_op(ops.geo_y_op)
115115
def _(expr: TypedExpr) -> sge.Expression:
116-
return sge.func("SAFE.ST_Y", expr.expr)
116+
return sge.func("ST_Y", expr.expr)
117117

118118

119119
@register_binary_op(ops.GeoStDistanceOp, pass_op=True)

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -558,16 +558,15 @@ def _explode_single_column(
558558
)
559559
selection = sge.Star(replace=[unnested_column_alias.as_(column)])
560560

561-
# TODO: "CROSS" if not keep_empty else "LEFT"
562-
# TODO: overlaps_with_parent to replace existing column.
563561
new_expr = _select_to_cte(
564562
self.expr,
565563
sge.to_identifier(
566564
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
567565
),
568566
)
567+
# Use LEFT JOIN to preserve rows when unnesting empty arrays.
569568
new_expr = new_expr.select(selection, append=False).join(
570-
unnest_expr, join_type="CROSS"
569+
unnest_expr, join_type="LEFT"
571570
)
572571
return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
573572

@@ -621,8 +620,9 @@ def _explode_multiple_columns(
621620
next(self.uid_gen.get_uid_stream("bfcte_")), quoted=self.quoted
622621
),
623622
)
623+
# Use LEFT JOIN to preserve rows when unnesting empty arrays.
624624
new_expr = new_expr.select(selection, append=False).join(
625-
unnest_expr, join_type="CROSS"
625+
unnest_expr, join_type="LEFT"
626626
)
627627
return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
628628

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,35 @@
11
WITH `bfcte_0` AS (
22
SELECT
3+
`bool_col`,
34
`float64_col`,
45
`int64_col`
56
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
67
), `bfcte_1` AS (
78
SELECT
89
*,
9-
COALESCE(`int64_col` IN (1, 2, 3), FALSE) AS `bfcol_2`,
10-
(
11-
`int64_col` IS NULL
12-
) OR `int64_col` IN (123456) AS `bfcol_3`,
13-
COALESCE(`int64_col` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_4`,
14-
FALSE AS `bfcol_5`,
15-
COALESCE(`int64_col` IN (2.5, 3), FALSE) AS `bfcol_6`,
10+
COALESCE(`bool_col` IN (TRUE, FALSE), FALSE) AS `bfcol_3`,
11+
COALESCE(`int64_col` IN (1, 2, 3), FALSE) AS `bfcol_4`,
12+
`int64_col` IS NULL AS `bfcol_5`,
13+
COALESCE(`int64_col` IN (1.0, 2.0, 3.0), FALSE) AS `bfcol_6`,
1614
FALSE AS `bfcol_7`,
17-
COALESCE(`int64_col` IN (123456), FALSE) AS `bfcol_8`,
15+
COALESCE(`int64_col` IN (2.5, 3), FALSE) AS `bfcol_8`,
16+
FALSE AS `bfcol_9`,
17+
FALSE AS `bfcol_10`,
18+
COALESCE(`int64_col` IN (123456), FALSE) AS `bfcol_11`,
1819
(
1920
`float64_col` IS NULL
20-
) OR `float64_col` IN (1, 2, 3) AS `bfcol_9`
21+
) OR `float64_col` IN (1, 2, 3) AS `bfcol_12`
2122
FROM `bfcte_0`
2223
)
2324
SELECT
24-
`bfcol_2` AS `ints`,
25-
`bfcol_3` AS `ints_w_null`,
26-
`bfcol_4` AS `floats`,
27-
`bfcol_5` AS `strings`,
28-
`bfcol_6` AS `mixed`,
29-
`bfcol_7` AS `empty`,
30-
`bfcol_8` AS `ints_wo_match_nulls`,
31-
`bfcol_9` AS `float_in_ints`
25+
`bfcol_3` AS `bools`,
26+
`bfcol_4` AS `ints`,
27+
`bfcol_5` AS `ints_w_null`,
28+
`bfcol_6` AS `floats`,
29+
`bfcol_7` AS `strings`,
30+
`bfcol_8` AS `mixed`,
31+
`bfcol_9` AS `empty`,
32+
`bfcol_10` AS `empty_wo_match_nulls`,
33+
`bfcol_11` AS `ints_wo_match_nulls`,
34+
`bfcol_12` AS `float_in_ints`
3235
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_x/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
SAFE.ST_X(`geography_col`) AS `bfcol_1`
8+
ST_X(`geography_col`) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/expressions/snapshots/test_geo_ops/test_geo_y/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ WITH `bfcte_0` AS (
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
SAFE.ST_Y(`geography_col`) AS `bfcol_1`
8+
ST_Y(`geography_col`) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/expressions/test_comparison_ops.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import pandas as pd
1516
import pytest
1617

1718
from bigframes import operations as ops
@@ -22,18 +23,23 @@
2223

2324

2425
def test_is_in(scalar_types_df: bpd.DataFrame, snapshot):
26+
bool_col = "bool_col"
2527
int_col = "int64_col"
2628
float_col = "float64_col"
27-
bf_df = scalar_types_df[[int_col, float_col]]
29+
bf_df = scalar_types_df[[bool_col, int_col, float_col]]
2830
ops_map = {
31+
"bools": ops.IsInOp(values=(True, False)).as_expr(bool_col),
2932
"ints": ops.IsInOp(values=(1, 2, 3)).as_expr(int_col),
30-
"ints_w_null": ops.IsInOp(values=(None, 123456)).as_expr(int_col),
33+
"ints_w_null": ops.IsInOp(values=(None, pd.NA)).as_expr(int_col),
3134
"floats": ops.IsInOp(values=(1.0, 2.0, 3.0), match_nulls=False).as_expr(
3235
int_col
3336
),
3437
"strings": ops.IsInOp(values=("1.0", "2.0")).as_expr(int_col),
3538
"mixed": ops.IsInOp(values=("1.0", 2.5, 3)).as_expr(int_col),
3639
"empty": ops.IsInOp(values=()).as_expr(int_col),
40+
"empty_wo_match_nulls": ops.IsInOp(values=(), match_nulls=False).as_expr(
41+
int_col
42+
),
3743
"ints_wo_match_nulls": ops.IsInOp(
3844
values=(None, 123456), match_nulls=False
3945
).as_expr(int_col),

tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_dataframe/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ WITH `bfcte_0` AS (
99
*
1010
REPLACE (`int_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `int_list_col`, `string_list_col`[SAFE_OFFSET(`bfcol_13`)] AS `string_list_col`)
1111
FROM `bfcte_0`
12-
CROSS JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7`
12+
LEFT JOIN UNNEST(GENERATE_ARRAY(0, LEAST(ARRAY_LENGTH(`int_list_col`) - 1, ARRAY_LENGTH(`string_list_col`) - 1))) AS `bfcol_13` WITH OFFSET AS `bfcol_7`
1313
)
1414
SELECT
1515
`rowindex`,

tests/unit/core/compile/sqlglot/snapshots/test_compile_explode/test_compile_explode_series/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ WITH `bfcte_0` AS (
88
*
99
REPLACE (`bfcol_8` AS `int_list_col`)
1010
FROM `bfcte_0`
11-
CROSS JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4`
11+
LEFT JOIN UNNEST(`int_list_col`) AS `bfcol_8` WITH OFFSET AS `bfcol_4`
1212
)
1313
SELECT
1414
`rowindex`,

0 commit comments

Comments
 (0)