Skip to content

Commit 3a4be75

Browse files
authored
chore: implement blob scalar ops for sqlglot compilers (#1989)
1 parent 68f1d22 commit 3a4be75

File tree

8 files changed

+124
-28
lines changed

8 files changed

+124
-28
lines changed

bigframes/core/compile/sqlglot/expressions/binary_compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
158158
raise TypeError(
159159
f"Cannot subtract type {left.dtype} and {right.dtype}. {constants.FEEDBACK_LINK}"
160160
)
161+
162+
163+
@BINARY_OP_REGISTRATION.register(ops.obj_make_ref_op)
164+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
165+
return sge.func("OBJ.MAKE_REF", left.expr, right.expr)

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,26 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
347347
)
348348

349349

350+
@UNARY_OP_REGISTRATION.register(ops.iso_day_op)
351+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
352+
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
353+
354+
355+
@UNARY_OP_REGISTRATION.register(ops.iso_week_op)
356+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
357+
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
358+
359+
360+
@UNARY_OP_REGISTRATION.register(ops.iso_year_op)
361+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
362+
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)
363+
364+
365+
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
366+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
367+
return sge.Is(this=expr.expr, expression=sge.Null())
368+
369+
350370
@UNARY_OP_REGISTRATION.register(ops.isnumeric_op)
351371
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
352372
return sge.RegexpLike(this=expr.expr, expression=sge.convert(r"^\pN+$"))
@@ -445,6 +465,21 @@ def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
445465
return sge.TimestampTrunc(this=expr.expr, unit=sge.Identifier(this="DAY"))
446466

447467

468+
@UNARY_OP_REGISTRATION.register(ops.notnull_op)
469+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
470+
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
471+
472+
473+
@UNARY_OP_REGISTRATION.register(ops.obj_fetch_metadata_op)
474+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
475+
return sge.func("OBJ.FETCH_METADATA", expr.expr)
476+
477+
478+
@UNARY_OP_REGISTRATION.register(ops.ObjGetAccessUrl)
479+
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
480+
return sge.func("OBJ.GET_ACCESS_URL", expr.expr)
481+
482+
448483
@UNARY_OP_REGISTRATION.register(ops.pos_op)
449484
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
450485
return expr.expr
@@ -488,31 +523,6 @@ def _(op: ops.StrStripOp, expr: TypedExpr) -> sge.Expression:
488523
return sge.Trim(this=sge.convert(op.to_strip), expression=expr.expr)
489524

490525

491-
@UNARY_OP_REGISTRATION.register(ops.iso_day_op)
492-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
493-
return sge.Extract(this=sge.Identifier(this="DAYOFWEEK"), expression=expr.expr)
494-
495-
496-
@UNARY_OP_REGISTRATION.register(ops.iso_week_op)
497-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
498-
return sge.Extract(this=sge.Identifier(this="ISOWEEK"), expression=expr.expr)
499-
500-
501-
@UNARY_OP_REGISTRATION.register(ops.iso_year_op)
502-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
503-
return sge.Extract(this=sge.Identifier(this="ISOYEAR"), expression=expr.expr)
504-
505-
506-
@UNARY_OP_REGISTRATION.register(ops.isnull_op)
507-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
508-
return sge.Is(this=expr.expr, expression=sge.Null())
509-
510-
511-
@UNARY_OP_REGISTRATION.register(ops.notnull_op)
512-
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
513-
return sge.Not(this=sge.Is(this=expr.expr, expression=sge.Null()))
514-
515-
516526
@UNARY_OP_REGISTRATION.register(ops.sin_op)
517527
def _(op: ops.base_ops.UnaryOp, expr: TypedExpr) -> sge.Expression:
518528
return sge.func("SIN", expr.expr)
@@ -626,7 +636,6 @@ def _(op: ops.UnixSeconds, expr: TypedExpr) -> sge.Expression:
626636
return sge.func("UNIX_SECONDS", expr.expr)
627637

628638

629-
# JSON Ops
630639
@UNARY_OP_REGISTRATION.register(ops.JSONExtract)
631640
def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression:
632641
return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path))

bigframes/operations/blob_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ def output_type(self, *input_types):
3636

3737
@dataclasses.dataclass(frozen=True)
3838
class ObjMakeRef(base_ops.BinaryOp):
39-
name: typing.ClassVar[str] = "obj.make_ref"
39+
name: typing.ClassVar[str] = "obj_make_ref"
4040

4141
def output_type(self, *input_types):
4242
if not all(map(dtypes.is_string_like, input_types)):
43-
raise TypeError("obj.make_ref requires string-like arguments")
43+
raise TypeError("obj_make_ref requires string-like arguments")
4444

4545
return dtypes.OBJ_REF_DTYPE
4646

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_0` AS `rowindex`,
14+
`bfcol_4` AS `string_col`
15+
FROM `bfcte_1`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
), `bfcte_2` AS (
12+
SELECT
13+
*,
14+
OBJ.FETCH_METADATA(`bfcol_4`) AS `bfcol_7`
15+
FROM `bfcte_1`
16+
), `bfcte_3` AS (
17+
SELECT
18+
*,
19+
`bfcol_7`.`version` AS `bfcol_10`
20+
FROM `bfcte_2`
21+
)
22+
SELECT
23+
`bfcol_0` AS `rowindex`,
24+
`bfcol_10` AS `version`
25+
FROM `bfcte_3`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
OBJ.MAKE_REF(`bfcol_1`, 'bigframes-dev.test-region.bigframes-default-connection') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
), `bfcte_2` AS (
12+
SELECT
13+
*,
14+
OBJ.GET_ACCESS_URL(`bfcol_4`) AS `bfcol_7`
15+
FROM `bfcte_1`
16+
), `bfcte_3` AS (
17+
SELECT
18+
*,
19+
JSON_VALUE(`bfcol_7`, '$.access_urls.read_url') AS `bfcol_10`
20+
FROM `bfcte_2`
21+
)
22+
SELECT
23+
`bfcol_0` AS `rowindex`,
24+
`bfcol_10` AS `string_col`
25+
FROM `bfcte_3`

tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,8 @@ def test_mul_timedelta(scalar_types_df: bpd.DataFrame, snapshot):
164164
bf_df["numeric_mul_timedelta"] = bf_df["int64_col"] * timedelta
165165

166166
snapshot.assert_match(bf_df.sql, "out.sql")
167+
168+
169+
def test_obj_make_ref(scalar_types_df: bpd.DataFrame, snapshot):
170+
blob_df = scalar_types_df["string_col"].str.to_blob()
171+
snapshot.assert_match(blob_df.to_frame().sql, "out.sql")

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,18 @@ def test_normalize(scalar_types_df: bpd.DataFrame, snapshot):
405405
snapshot.assert_match(sql, "out.sql")
406406

407407

408+
def test_obj_fetch_metadata(scalar_types_df: bpd.DataFrame, snapshot):
409+
blob_s = scalar_types_df["string_col"].str.to_blob()
410+
sql = blob_s.blob.version().to_frame().sql
411+
snapshot.assert_match(sql, "out.sql")
412+
413+
414+
def test_obj_get_access_url(scalar_types_df: bpd.DataFrame, snapshot):
415+
blob_s = scalar_types_df["string_col"].str.to_blob()
416+
sql = blob_s.blob.read_url().to_frame().sql
417+
snapshot.assert_match(sql, "out.sql")
418+
419+
408420
def test_pos(scalar_types_df: bpd.DataFrame, snapshot):
409421
bf_df = scalar_types_df[["float64_col"]]
410422
sql = _apply_unary_op(bf_df, ops.pos_op, "float64_col")

0 commit comments

Comments
 (0)