Skip to content

Commit fed8039

Browse files
authored
chore: implement add_op and del_op compilers (#1955)
1 parent 82175a4 commit fed8039

File tree

9 files changed

+364
-29
lines changed

9 files changed

+364
-29
lines changed

bigframes/core/compile/sqlglot/expressions/binary_compiler.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from __future__ import annotations
1616

17+
import bigframes_vendored.constants as constants
1718
import sqlglot.expressions as sge
1819

1920
from bigframes import dtypes
@@ -35,8 +36,83 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
3536
# String addition
3637
return sge.Concat(expressions=[left.expr, right.expr])
3738

38-
# Numerical addition
39-
return sge.Add(this=left.expr, expression=right.expr)
39+
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
40+
left_expr = left.expr
41+
if left.dtype == dtypes.BOOL_DTYPE:
42+
left_expr = sge.Cast(this=left_expr, to="INT64")
43+
right_expr = right.expr
44+
if right.dtype == dtypes.BOOL_DTYPE:
45+
right_expr = sge.Cast(this=right_expr, to="INT64")
46+
return sge.Add(this=left_expr, expression=right_expr)
47+
48+
if (
49+
dtypes.is_time_or_date_like(left.dtype)
50+
and right.dtype == dtypes.TIMEDELTA_DTYPE
51+
):
52+
left_expr = left.expr
53+
if left.dtype == dtypes.DATE_DTYPE:
54+
left_expr = sge.Cast(this=left_expr, to="DATETIME")
55+
return sge.TimestampAdd(
56+
this=left_expr, expression=right.expr, unit=sge.Var(this="MICROSECOND")
57+
)
58+
if (
59+
dtypes.is_time_or_date_like(right.dtype)
60+
and left.dtype == dtypes.TIMEDELTA_DTYPE
61+
):
62+
right_expr = right.expr
63+
if right.dtype == dtypes.DATE_DTYPE:
64+
right_expr = sge.Cast(this=right_expr, to="DATETIME")
65+
return sge.TimestampAdd(
66+
this=right_expr, expression=left.expr, unit=sge.Var(this="MICROSECOND")
67+
)
68+
if left.dtype == dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE:
69+
return sge.Add(this=left.expr, expression=right.expr)
70+
71+
raise TypeError(
72+
f"Cannot add type {left.dtype} and {right.dtype}. {constants.FEEDBACK_LINK}"
73+
)
74+
75+
76+
@BINARY_OP_REGISTRATION.register(ops.sub_op)
77+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
78+
if dtypes.is_numeric(left.dtype) and dtypes.is_numeric(right.dtype):
79+
left_expr = left.expr
80+
if left.dtype == dtypes.BOOL_DTYPE:
81+
left_expr = sge.Cast(this=left_expr, to="INT64")
82+
right_expr = right.expr
83+
if right.dtype == dtypes.BOOL_DTYPE:
84+
right_expr = sge.Cast(this=right_expr, to="INT64")
85+
return sge.Sub(this=left_expr, expression=right_expr)
86+
87+
if (
88+
dtypes.is_time_or_date_like(left.dtype)
89+
and right.dtype == dtypes.TIMEDELTA_DTYPE
90+
):
91+
left_expr = left.expr
92+
if left.dtype == dtypes.DATE_DTYPE:
93+
left_expr = sge.Cast(this=left_expr, to="DATETIME")
94+
return sge.TimestampSub(
95+
this=left_expr, expression=right.expr, unit=sge.Var(this="MICROSECOND")
96+
)
97+
if dtypes.is_time_or_date_like(left.dtype) and dtypes.is_time_or_date_like(
98+
right.dtype
99+
):
100+
left_expr = left.expr
101+
if left.dtype == dtypes.DATE_DTYPE:
102+
left_expr = sge.Cast(this=left_expr, to="DATETIME")
103+
right_expr = right.expr
104+
if right.dtype == dtypes.DATE_DTYPE:
105+
right_expr = sge.Cast(this=right_expr, to="DATETIME")
106+
return sge.TimestampDiff(
107+
this=left_expr, expression=right_expr, unit=sge.Var(this="MICROSECOND")
108+
)
109+
110+
if left.dtype == dtypes.TIMEDELTA_DTYPE and right.dtype == dtypes.TIMEDELTA_DTYPE:
111+
return sge.Sub(this=left.expr, expression=right.expr)
112+
113+
raise TypeError(
114+
f"Cannot subtract type {left.dtype} and {right.dtype}. {constants.FEEDBACK_LINK}"
115+
)
40116

41117

42118
@BINARY_OP_REGISTRATION.register(ops.ge_op)

bigframes/dtypes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,10 @@ def is_time_like(type_: ExpressionType) -> bool:
289289
return type_ in (DATETIME_DTYPE, TIMESTAMP_DTYPE, TIME_DTYPE)
290290

291291

292+
def is_time_or_date_like(type_: ExpressionType) -> bool:
293+
return type_ in (DATE_DTYPE, DATETIME_DTYPE, TIME_DTYPE, TIMESTAMP_DTYPE)
294+
295+
292296
def is_geo_like(type_: ExpressionType) -> bool:
293297
return type_ in (GEO_DTYPE,)
294298

tests/system/small/engines/test_numeric_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def apply_op_pairwise(
5353
return new_arr
5454

5555

56-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
56+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
5757
def test_engines_project_add(
5858
scalars_array_value: array_value.ArrayValue,
5959
engine,
@@ -62,7 +62,7 @@ def test_engines_project_add(
6262
assert_equivalence_execution(arr.node, REFERENCE_ENGINE, engine)
6363

6464

65-
@pytest.mark.parametrize("engine", ["polars", "bq"], indirect=True)
65+
@pytest.mark.parametrize("engine", ["polars", "bq", "bq-sqlglot"], indirect=True)
6666
def test_engines_project_sub(
6767
scalars_array_value: array_value.ArrayValue,
6868
engine,
Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,54 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`rowindex` AS `bfcol_2`
46
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
57
), `bfcte_1` AS (
68
SELECT
79
*,
8-
`bfcol_0` + `bfcol_0` AS `bfcol_1`
10+
`bfcol_2` AS `bfcol_6`,
11+
`bfcol_1` AS `bfcol_7`,
12+
`bfcol_0` AS `bfcol_8`,
13+
`bfcol_1` + `bfcol_1` AS `bfcol_9`
914
FROM `bfcte_0`
15+
), `bfcte_2` AS (
16+
SELECT
17+
*,
18+
`bfcol_6` AS `bfcol_14`,
19+
`bfcol_7` AS `bfcol_15`,
20+
`bfcol_8` AS `bfcol_16`,
21+
`bfcol_9` AS `bfcol_17`,
22+
`bfcol_7` + 1 AS `bfcol_18`
23+
FROM `bfcte_1`
24+
), `bfcte_3` AS (
25+
SELECT
26+
*,
27+
`bfcol_14` AS `bfcol_24`,
28+
`bfcol_15` AS `bfcol_25`,
29+
`bfcol_16` AS `bfcol_26`,
30+
`bfcol_17` AS `bfcol_27`,
31+
`bfcol_18` AS `bfcol_28`,
32+
`bfcol_15` + CAST(`bfcol_16` AS INT64) AS `bfcol_29`
33+
FROM `bfcte_2`
34+
), `bfcte_4` AS (
35+
SELECT
36+
*,
37+
`bfcol_24` AS `bfcol_36`,
38+
`bfcol_25` AS `bfcol_37`,
39+
`bfcol_26` AS `bfcol_38`,
40+
`bfcol_27` AS `bfcol_39`,
41+
`bfcol_28` AS `bfcol_40`,
42+
`bfcol_29` AS `bfcol_41`,
43+
CAST(`bfcol_26` AS INT64) + `bfcol_25` AS `bfcol_42`
44+
FROM `bfcte_3`
1045
)
1146
SELECT
12-
`bfcol_1` AS `int64_col`
13-
FROM `bfcte_1`
47+
`bfcol_36` AS `rowindex`,
48+
`bfcol_37` AS `int64_col`,
49+
`bfcol_38` AS `bool_col`,
50+
`bfcol_39` AS `int_add_int`,
51+
`bfcol_40` AS `int_add_1`,
52+
`bfcol_41` AS `int_add_bool`,
53+
`bfcol_42` AS `bool_add_int`
54+
FROM `bfcte_4`

tests/unit/core/compile/sqlglot/expressions/snapshots/test_binary_compiler/test_add_numeric_w_scalar/out.sql

Lines changed: 0 additions & 13 deletions
This file was deleted.
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`date_col` AS `bfcol_0`,
4+
`rowindex` AS `bfcol_1`,
5+
`timestamp_col` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
`bfcol_1` AS `bfcol_6`,
11+
`bfcol_2` AS `bfcol_7`,
12+
`bfcol_0` AS `bfcol_8`,
13+
TIMESTAMP_ADD(CAST(`bfcol_0` AS DATETIME), INTERVAL 86400000000 MICROSECOND) AS `bfcol_9`
14+
FROM `bfcte_0`
15+
), `bfcte_2` AS (
16+
SELECT
17+
*,
18+
`bfcol_6` AS `bfcol_14`,
19+
`bfcol_7` AS `bfcol_15`,
20+
`bfcol_8` AS `bfcol_16`,
21+
`bfcol_9` AS `bfcol_17`,
22+
TIMESTAMP_ADD(`bfcol_7`, INTERVAL 86400000000 MICROSECOND) AS `bfcol_18`
23+
FROM `bfcte_1`
24+
), `bfcte_3` AS (
25+
SELECT
26+
*,
27+
`bfcol_14` AS `bfcol_24`,
28+
`bfcol_15` AS `bfcol_25`,
29+
`bfcol_16` AS `bfcol_26`,
30+
`bfcol_17` AS `bfcol_27`,
31+
`bfcol_18` AS `bfcol_28`,
32+
TIMESTAMP_ADD(CAST(`bfcol_16` AS DATETIME), INTERVAL 86400000000 MICROSECOND) AS `bfcol_29`
33+
FROM `bfcte_2`
34+
), `bfcte_4` AS (
35+
SELECT
36+
*,
37+
`bfcol_24` AS `bfcol_36`,
38+
`bfcol_25` AS `bfcol_37`,
39+
`bfcol_26` AS `bfcol_38`,
40+
`bfcol_27` AS `bfcol_39`,
41+
`bfcol_28` AS `bfcol_40`,
42+
`bfcol_29` AS `bfcol_41`,
43+
TIMESTAMP_ADD(`bfcol_25`, INTERVAL 86400000000 MICROSECOND) AS `bfcol_42`
44+
FROM `bfcte_3`
45+
), `bfcte_5` AS (
46+
SELECT
47+
*,
48+
172800000000 AS `bfcol_50`
49+
FROM `bfcte_4`
50+
)
51+
SELECT
52+
`bfcol_36` AS `rowindex`,
53+
`bfcol_37` AS `timestamp_col`,
54+
`bfcol_38` AS `date_col`,
55+
`bfcol_39` AS `date_add_timedelta`,
56+
`bfcol_40` AS `timestamp_add_timedelta`,
57+
`bfcol_41` AS `timedelta_add_date`,
58+
`bfcol_42` AS `timedelta_add_timestamp`,
59+
`bfcol_50` AS `timedelta_add_timedelta`
60+
FROM `bfcte_5`
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col` AS `bfcol_0`,
4+
`int64_col` AS `bfcol_1`,
5+
`rowindex` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
`bfcol_2` AS `bfcol_6`,
11+
`bfcol_1` AS `bfcol_7`,
12+
`bfcol_0` AS `bfcol_8`,
13+
`bfcol_1` - `bfcol_1` AS `bfcol_9`
14+
FROM `bfcte_0`
15+
), `bfcte_2` AS (
16+
SELECT
17+
*,
18+
`bfcol_6` AS `bfcol_14`,
19+
`bfcol_7` AS `bfcol_15`,
20+
`bfcol_8` AS `bfcol_16`,
21+
`bfcol_9` AS `bfcol_17`,
22+
`bfcol_7` - 1 AS `bfcol_18`
23+
FROM `bfcte_1`
24+
), `bfcte_3` AS (
25+
SELECT
26+
*,
27+
`bfcol_14` AS `bfcol_24`,
28+
`bfcol_15` AS `bfcol_25`,
29+
`bfcol_16` AS `bfcol_26`,
30+
`bfcol_17` AS `bfcol_27`,
31+
`bfcol_18` AS `bfcol_28`,
32+
`bfcol_15` - CAST(`bfcol_16` AS INT64) AS `bfcol_29`
33+
FROM `bfcte_2`
34+
), `bfcte_4` AS (
35+
SELECT
36+
*,
37+
`bfcol_24` AS `bfcol_36`,
38+
`bfcol_25` AS `bfcol_37`,
39+
`bfcol_26` AS `bfcol_38`,
40+
`bfcol_27` AS `bfcol_39`,
41+
`bfcol_28` AS `bfcol_40`,
42+
`bfcol_29` AS `bfcol_41`,
43+
CAST(`bfcol_26` AS INT64) - `bfcol_25` AS `bfcol_42`
44+
FROM `bfcte_3`
45+
)
46+
SELECT
47+
`bfcol_36` AS `rowindex`,
48+
`bfcol_37` AS `int64_col`,
49+
`bfcol_38` AS `bool_col`,
50+
`bfcol_39` AS `int_add_int`,
51+
`bfcol_40` AS `int_add_1`,
52+
`bfcol_41` AS `int_add_bool`,
53+
`bfcol_42` AS `bool_add_int`
54+
FROM `bfcte_4`
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`date_col` AS `bfcol_0`,
4+
`rowindex` AS `bfcol_1`,
5+
`timestamp_col` AS `bfcol_2`
6+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
7+
), `bfcte_1` AS (
8+
SELECT
9+
*,
10+
`bfcol_1` AS `bfcol_6`,
11+
`bfcol_2` AS `bfcol_7`,
12+
`bfcol_0` AS `bfcol_8`,
13+
TIMESTAMP_SUB(CAST(`bfcol_0` AS DATETIME), INTERVAL 86400000000 MICROSECOND) AS `bfcol_9`
14+
FROM `bfcte_0`
15+
), `bfcte_2` AS (
16+
SELECT
17+
*,
18+
`bfcol_6` AS `bfcol_14`,
19+
`bfcol_7` AS `bfcol_15`,
20+
`bfcol_8` AS `bfcol_16`,
21+
`bfcol_9` AS `bfcol_17`,
22+
TIMESTAMP_SUB(`bfcol_7`, INTERVAL 86400000000 MICROSECOND) AS `bfcol_18`
23+
FROM `bfcte_1`
24+
), `bfcte_3` AS (
25+
SELECT
26+
*,
27+
`bfcol_14` AS `bfcol_24`,
28+
`bfcol_15` AS `bfcol_25`,
29+
`bfcol_16` AS `bfcol_26`,
30+
`bfcol_17` AS `bfcol_27`,
31+
`bfcol_18` AS `bfcol_28`,
32+
TIMESTAMP_DIFF(CAST(`bfcol_16` AS DATETIME), CAST(`bfcol_16` AS DATETIME), MICROSECOND) AS `bfcol_29`
33+
FROM `bfcte_2`
34+
), `bfcte_4` AS (
35+
SELECT
36+
*,
37+
`bfcol_24` AS `bfcol_36`,
38+
`bfcol_25` AS `bfcol_37`,
39+
`bfcol_26` AS `bfcol_38`,
40+
`bfcol_27` AS `bfcol_39`,
41+
`bfcol_28` AS `bfcol_40`,
42+
`bfcol_29` AS `bfcol_41`,
43+
TIMESTAMP_DIFF(`bfcol_25`, `bfcol_25`, MICROSECOND) AS `bfcol_42`
44+
FROM `bfcte_3`
45+
), `bfcte_5` AS (
46+
SELECT
47+
*,
48+
0 AS `bfcol_50`
49+
FROM `bfcte_4`
50+
)
51+
SELECT
52+
`bfcol_36` AS `rowindex`,
53+
`bfcol_37` AS `timestamp_col`,
54+
`bfcol_38` AS `date_col`,
55+
`bfcol_39` AS `date_sub_timedelta`,
56+
`bfcol_40` AS `timestamp_sub_timedelta`,
57+
`bfcol_41` AS `timestamp_sub_date`,
58+
`bfcol_42` AS `date_sub_timestamp`,
59+
`bfcol_50` AS `timedelta_sub_timedelta`
60+
FROM `bfcte_5`

0 commit comments

Comments
 (0)