Skip to content

Commit 0581a2a

Browse files
authored
refactor: add json operators to SQLGlot compiler (#1887)
1 parent 8715105 commit 0581a2a

File tree

7 files changed

+152
-2
lines changed

7 files changed

+152
-2
lines changed

bigframes/core/compile/sqlglot/expressions/binary_compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,8 @@ def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
4242
@BINARY_OP_REGISTRATION.register(ops.ge_op)
4343
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
4444
return sge.GTE(this=left.expr, expression=right.expr)
45+
46+
47+
@BINARY_OP_REGISTRATION.register(ops.JSONSet)
48+
def _(op, left: TypedExpr, right: TypedExpr) -> sge.Expression:
49+
return sge.func("JSON_SET", left.expr, sge.convert(op.json_path), right.expr)

bigframes/core/compile/sqlglot/expressions/unary_compiler.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,49 @@ def _(op: ops.ArraySliceOp, expr: TypedExpr) -> sge.Expression:
7070
)
7171

7272
return sge.array(selected_elements)
73+
74+
75+
# JSON Ops
76+
@UNARY_OP_REGISTRATION.register(ops.JSONExtract)
77+
def _(op: ops.JSONExtract, expr: TypedExpr) -> sge.Expression:
78+
return sge.func("JSON_EXTRACT", expr.expr, sge.convert(op.json_path))
79+
80+
81+
@UNARY_OP_REGISTRATION.register(ops.JSONExtractArray)
82+
def _(op: ops.JSONExtractArray, expr: TypedExpr) -> sge.Expression:
83+
return sge.func("JSON_EXTRACT_ARRAY", expr.expr, sge.convert(op.json_path))
84+
85+
86+
@UNARY_OP_REGISTRATION.register(ops.JSONExtractStringArray)
87+
def _(op: ops.JSONExtractStringArray, expr: TypedExpr) -> sge.Expression:
88+
return sge.func("JSON_EXTRACT_STRING_ARRAY", expr.expr, sge.convert(op.json_path))
89+
90+
91+
@UNARY_OP_REGISTRATION.register(ops.JSONQuery)
92+
def _(op: ops.JSONQuery, expr: TypedExpr) -> sge.Expression:
93+
return sge.func("JSON_QUERY", expr.expr, sge.convert(op.json_path))
94+
95+
96+
@UNARY_OP_REGISTRATION.register(ops.JSONQueryArray)
97+
def _(op: ops.JSONQueryArray, expr: TypedExpr) -> sge.Expression:
98+
return sge.func("JSON_QUERY_ARRAY", expr.expr, sge.convert(op.json_path))
99+
100+
101+
@UNARY_OP_REGISTRATION.register(ops.JSONValue)
102+
def _(op: ops.JSONValue, expr: TypedExpr) -> sge.Expression:
103+
return sge.func("JSON_VALUE", expr.expr, sge.convert(op.json_path))
104+
105+
106+
@UNARY_OP_REGISTRATION.register(ops.JSONValueArray)
107+
def _(op: ops.JSONValueArray, expr: TypedExpr) -> sge.Expression:
108+
return sge.func("JSON_VALUE_ARRAY", expr.expr, sge.convert(op.json_path))
109+
110+
111+
@UNARY_OP_REGISTRATION.register(ops.ParseJSON)
112+
def _(op: ops.ParseJSON, expr: TypedExpr) -> sge.Expression:
113+
return sge.func("PARSE_JSON", expr.expr)
114+
115+
116+
@UNARY_OP_REGISTRATION.register(ops.ToJSONString)
117+
def _(op: ops.ToJSONString, expr: TypedExpr) -> sge.Expression:
118+
return sge.func("TO_JSON_STRING", expr.expr)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`json_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`json_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
JSON_SET(`bfcol_1`, '$.a', 100) AS `bfcol_4`
10+
FROM `bfcte_0`
11+
), `bfcte_2` AS (
12+
SELECT
13+
*,
14+
JSON_SET(`bfcol_4`, '$.b', 'hi') AS `bfcol_7`
15+
FROM `bfcte_1`
16+
)
17+
SELECT
18+
`bfcol_0` AS `rowindex`,
19+
`bfcol_7` AS `json_col`
20+
FROM `bfcte_2`
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`json_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`json_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
JSON_EXTRACT(`bfcol_1`, '$') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_0` AS `rowindex`,
14+
`bfcol_4` AS `json_col`
15+
FROM `bfcte_1`
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`rowindex` AS `bfcol_0`,
4+
`string_col` AS `bfcol_1`
5+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
6+
), `bfcte_1` AS (
7+
SELECT
8+
*,
9+
JSON_VALUE(`bfcol_1`, '$') AS `bfcol_4`
10+
FROM `bfcte_0`
11+
)
12+
SELECT
13+
`bfcol_0` AS `rowindex`,
14+
`bfcol_4` AS `string_col`
15+
FROM `bfcte_1`

tests/unit/core/compile/sqlglot/expressions/test_binary_compiler.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import pytest
1616

17+
import bigframes.bigquery as bbq
1718
import bigframes.pandas as bpd
1819

1920
pytest.importorskip("pytest_snapshot")
@@ -41,3 +42,8 @@ def test_add_string(scalar_types_df: bpd.DataFrame, snapshot):
4142
bf_df["string_col"] = bf_df["string_col"] + "a"
4243

4344
snapshot.assert_match(bf_df.sql, "out.sql")
45+
46+
47+
def test_json_set(json_types_df: bpd.DataFrame, snapshot):
48+
result = bbq.json_set(json_types_df["json_col"], [("$.a", 100), ("$.b", "hi")])
49+
snapshot.assert_match(result.to_frame().sql, "out.sql")

tests/unit/core/compile/sqlglot/expressions/test_unary_compiler.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@
1414

1515
import pytest
1616

17-
from bigframes import bigquery
17+
import bigframes.bigquery as bbq
1818
import bigframes.pandas as bpd
1919

2020
pytest.importorskip("pytest_snapshot")
2121

2222

2323
def test_array_to_string(repeated_types_df: bpd.DataFrame, snapshot):
24-
result = bigquery.array_to_string(repeated_types_df["string_list_col"], ".")
24+
result = bbq.array_to_string(repeated_types_df["string_list_col"], ".")
2525

2626
snapshot.assert_match(result.to_frame().sql, "out.sql")
2727

@@ -42,3 +42,46 @@ def test_array_slice_with_start_and_stop(repeated_types_df: bpd.DataFrame, snaps
4242
result = repeated_types_df["string_list_col"].list[1:5]
4343

4444
snapshot.assert_match(result.to_frame().sql, "out.sql")
45+
46+
47+
# JSON Ops
48+
def test_json_extract(json_types_df: bpd.DataFrame, snapshot):
49+
result = bbq.json_extract(json_types_df["json_col"], "$")
50+
expected_sql = "JSON_EXTRACT(`bfcol_1`, '$') AS `bfcol_4`"
51+
assert expected_sql in result.to_frame().sql
52+
snapshot.assert_match(result.to_frame().sql, "out.sql")
53+
54+
55+
def test_json_extract_array(json_types_df: bpd.DataFrame):
56+
result = bbq.json_extract_array(json_types_df["json_col"], "$")
57+
expected_sql = "JSON_EXTRACT_ARRAY(`bfcol_1`, '$') AS `bfcol_4`"
58+
assert expected_sql in result.to_frame().sql
59+
60+
61+
def test_json_extract_string_array(json_types_df: bpd.DataFrame):
62+
result = bbq.json_extract_string_array(json_types_df["json_col"], "$")
63+
expected_sql = "JSON_EXTRACT_STRING_ARRAY(`bfcol_1`, '$') AS `bfcol_4`"
64+
assert expected_sql in result.to_frame().sql
65+
66+
67+
def test_json_query(json_types_df: bpd.DataFrame):
68+
result = bbq.json_query(json_types_df["json_col"], "$")
69+
expected_sql = "JSON_QUERY(`bfcol_1`, '$') AS `bfcol_4`"
70+
assert expected_sql in result.to_frame().sql
71+
72+
73+
def test_json_query_array(json_types_df: bpd.DataFrame):
74+
result = bbq.json_query_array(json_types_df["json_col"], "$")
75+
expected_sql = "JSON_QUERY_ARRAY(`bfcol_1`, '$') AS `bfcol_4`"
76+
assert expected_sql in result.to_frame().sql
77+
78+
79+
def test_json_value(json_types_df: bpd.DataFrame):
80+
result = bbq.json_value(json_types_df["json_col"], "$")
81+
expected_sql = "JSON_VALUE(`bfcol_1`, '$') AS `bfcol_4`"
82+
assert expected_sql in result.to_frame().sql
83+
84+
85+
def test_parse_json(scalar_types_df: bpd.DataFrame, snapshot):
86+
result = bbq.json_value(scalar_types_df["string_col"], "$")
87+
snapshot.assert_match(result.to_frame().sql, "out.sql")

0 commit comments

Comments
 (0)