Skip to content

Commit e9bda37

Browse files
authored
chore: Migrate up to 15 scalar operators to SQLGlot (#1941)
Migrated the following unary scalar operators to SQLGlot: - StrftimeOp - UnixSeconds - UnixMicros - UnixMillis - FloorDtOp - geo_st_boundary_op - geo_st_geogfromtext_op - geo_st_isclosed_op - GeoStLengthOp - StructFieldOp - AsTypeOp - IsInOp - ToDatetimeOp - ToTimestampOp - ToTimedeltaOp
1 parent 51057fc commit e9bda37

File tree

16 files changed

+374
-0
lines changed

16 files changed

+374
-0
lines changed

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
import typing
1818

19+
import pandas as pd
20+
import pyarrow as pa
1921
import sqlglot
2022
import sqlglot.expressions as sge
2123

@@ -105,6 +107,12 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
105107
)
106108

107109

110+
@UNARY_OP_REGISTRATION.register(ops.AsTypeOp)
111+
def _(op: ops.AsTypeOp, expr: TypedExpr) -> sge.Expression:
112+
# TODO: Support more types for casting, such as JSON, etc.
113+
return sge.Cast(this=expr.expr, to=op.to_type)
114+
115+
108116
@UNARY_OP_REGISTRATION.register(ops.ArrayToStringOp)
109117
def _(op: ops.ArrayToStringOp, expr: TypedExpr) -> sge.Expression:
110118
return sge.ArrayToString(this=expr.expr, expression=f"'{op.delimiter}'")
@@ -234,6 +242,12 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
234242
) - sge.convert(1)
235243

236244

245+
@UNARY_OP_REGISTRATION.register(ops.FloorDtOp)
246+
def _(op: ops.FloorDtOp, expr: TypedExpr) -> sge.Expression:
247+
# TODO: Remove this method when it is covered by ops.FloorOp
248+
return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this=op.freq))
249+
250+
237251
@UNARY_OP_REGISTRATION.register(ops.floor_op)
238252
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
239253
return sge.Floor(this=expr.expr)
@@ -249,6 +263,26 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
249263
return sge.func("ST_ASTEXT", expr.expr)
250264

251265

266+
@UNARY_OP_REGISTRATION.register(ops.geo_st_boundary_op)
267+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
268+
return sge.func("ST_BOUNDARY", expr.expr)
269+
270+
271+
@UNARY_OP_REGISTRATION.register(ops.geo_st_geogfromtext_op)
272+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
273+
return sge.func("SAFE.ST_GEOGFROMTEXT", expr.expr)
274+
275+
276+
@UNARY_OP_REGISTRATION.register(ops.geo_st_isclosed_op)
277+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
278+
return sge.func("ST_ISCLOSED", expr.expr)
279+
280+
281+
@UNARY_OP_REGISTRATION.register(ops.GeoStLengthOp)
282+
def _(op: ops.GeoStLengthOp, expr: TypedExpr) -> sge.Expression:
283+
return sge.func("ST_LENGTH", expr.expr)
284+
285+
252286
@UNARY_OP_REGISTRATION.register(ops.geo_x_op)
253287
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
254288
return sge.func("SAFE.ST_X", expr.expr)
@@ -274,6 +308,11 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
274308
return sge.BitwiseNot(this=expr.expr)
275309

276310

311+
@UNARY_OP_REGISTRATION.register(ops.IsInOp)
312+
def _(op: ops.IsInOp, expr: TypedExpr) -> sge.Expression:
313+
return sge.In(this=expr.expr, expressions=[sge.convert(v) for v in op.values])
314+
315+
277316
@UNARY_OP_REGISTRATION.register(ops.isalnum_op)
278317
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
279318
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^(\p{N}|\p{L})+$"))
@@ -517,6 +556,26 @@ def _(op: ops.StrSliceOp, expr: TypedExpr) -> sge.Expression:
517556
)
518557

519558

559+
@UNARY_OP_REGISTRATION.register(ops.StrftimeOp)
560+
def _(op: ops.StrftimeOp, expr: TypedExpr) -> sge.Expression:
561+
return sge.func("FORMAT_TIMESTAMP", sge.convert(op.date_format), expr.expr)
562+
563+
564+
@UNARY_OP_REGISTRATION.register(ops.StructFieldOp)
565+
def _(op: ops.StructFieldOp, expr: TypedExpr) -> sge.Expression:
566+
if isinstance(op.name_or_index, str):
567+
name = op.name_or_index
568+
else:
569+
pa_type = typing.cast(pd.ArrowDtype, expr.dtype)
570+
pa_struct_type = typing.cast(pa.StructType, pa_type.pyarrow_dtype)
571+
name = pa_struct_type.field(op.name_or_index).name
572+
573+
return sge.Column(
574+
this=sge.to_identifier(name, quoted=True),
575+
catalog=expr.expr,
576+
)
577+
578+
520579
@UNARY_OP_REGISTRATION.register(ops.tan_op)
521580
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
522581
return sge.func("TAN", expr.expr)
@@ -537,6 +596,36 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
537596
return sge.Floor(this=expr.expr)
538597

539598

599+
@UNARY_OP_REGISTRATION.register(ops.ToDatetimeOp)
600+
def _(op: ops.ToDatetimeOp, expr: TypedExpr) -> sge.Expression:
601+
return sge.Cast(this=sge.func("TIMESTAMP_SECONDS", expr.expr), to="DATETIME")
602+
603+
604+
@UNARY_OP_REGISTRATION.register(ops.ToTimestampOp)
605+
def _(op: ops.ToTimestampOp, expr: TypedExpr) -> sge.Expression:
606+
return sge.func("TIMESTAMP_SECONDS", expr.expr)
607+
608+
609+
@UNARY_OP_REGISTRATION.register(ops.ToTimedeltaOp)
610+
def _(op: ops.ToTimedeltaOp, expr: TypedExpr) -> sge.Expression:
611+
return sge.Interval(this=expr.expr, unit=sge.Identifier(this="SECOND"))
612+
613+
614+
@UNARY_OP_REGISTRATION.register(ops.UnixMicros)
615+
def _(op: ops.UnixMicros, expr: TypedExpr) -> sge.Expression:
616+
return sge.func("UNIX_MICROS", expr.expr)
617+
618+
619+
@UNARY_OP_REGISTRATION.register(ops.UnixMillis)
620+
def _(op: ops.UnixMillis, expr: TypedExpr) -> sge.Expression:
621+
return sge.func("UNIX_MILLIS", expr.expr)
622+
623+
624+
@UNARY_OP_REGISTRATION.register(ops.UnixSeconds)
625+
def _(op: ops.UnixSeconds, expr: TypedExpr) -> sge.Expression:
626+
return sge.func("UNIX_SECONDS", expr.expr)
627+
628+
540629
# JSON Ops
541630
@UNARY_OP_REGISTRATION.register(ops.JSONExtract)
542631
def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression:
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`timestamp_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
TIMESTAMP_TRUNC(`bfcol_0`, DAY) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `timestamp_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`geography_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
ST_BOUNDARY(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `geography_col`
13+
FROM `bfcte_1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`string_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
SAFE.ST_GEOGFROMTEXT(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `string_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`geography_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
ST_ISCLOSED(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `geography_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`geography_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
ST_LENGTH(`bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `geography_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
`bfcol_0` IN (1, 2, 3) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `int64_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`timestamp_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
FORMAT_TIMESTAMP('%Y-%m-%d', `bfcol_0`) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `timestamp_col`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`people` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`nested_structs_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
`bfcol_0`.`name` AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `people`
13+
FROM `bfcte_1`
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`int64_col` AS `bfcol_0`
4+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
5+
), `bfcte_1` AS (
6+
SELECT
7+
*,
8+
CAST(TIMESTAMP_SECONDS(`bfcol_0`) AS DATETIME) AS `bfcol_1`
9+
FROM `bfcte_0`
10+
)
11+
SELECT
12+
`bfcol_1` AS `int64_col`
13+
FROM `bfcte_1`

0 commit comments

Comments
 (0)