Skip to content

Commit 27bbd80

Browse files
authored
feat: bigframes.bigquery.parse_json (#1265)
* feat: bigframes.bigquery.parse_json * add preview doc and warning * nit
1 parent 059a564 commit 27bbd80

File tree

7 files changed

+103
-11
lines changed

7 files changed

+103
-11
lines changed

bigframes/bigquery/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,27 @@
2727
json_extract_array,
2828
json_extract_string_array,
2929
json_set,
30+
parse_json,
3031
)
3132
from bigframes.bigquery._operations.search import create_vector_index, vector_search
3233
from bigframes.bigquery._operations.struct import struct
3334

3435
__all__ = [
36+
# approximate aggregate ops
37+
"approx_top_count",
38+
# array ops
3539
"array_length",
3640
"array_agg",
3741
"array_to_string",
42+
# json ops
3843
"json_set",
3944
"json_extract",
4045
"json_extract_array",
4146
"json_extract_string_array",
42-
"approx_top_count",
43-
"struct",
47+
"parse_json",
48+
# search ops
4449
"create_vector_index",
4550
"vector_search",
51+
# struct ops
52+
"struct",
4653
]

bigframes/bigquery/_operations/json.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,26 @@
2323

2424
from typing import Any, cast, Optional, Sequence, Tuple, Union
2525

26+
import bigframes.core.utils as utils
2627
import bigframes.dtypes
2728
import bigframes.operations as ops
2829
import bigframes.series as series
2930

3031
from . import array
3132

3233

34+
@utils.preview(name="The JSON-related API `json_set`")
3335
def json_set(
3436
input: series.Series,
3537
json_path_value_pairs: Sequence[Tuple[str, Any]],
3638
) -> series.Series:
3739
"""Produces a new JSON value within a Series by inserting or replacing values at
3840
specified paths.
3941
42+
.. warning::
43+
The JSON-related API `parse_json` is in preview. Its behavior may change in
44+
future versions.
45+
4046
**Examples:**
4147
4248
>>> import bigframes.pandas as bpd
@@ -223,3 +229,37 @@ def json_extract_string_array(
223229
),
224230
)
225231
return array_series
232+
233+
234+
@utils.preview(name="The JSON-related API `parse_json`")
235+
def parse_json(
236+
input: series.Series,
237+
) -> series.Series:
238+
"""Converts a series with a JSON-formatted STRING value to a JSON value.
239+
240+
.. warning::
241+
The JSON-related API `parse_json` is in preview. Its behavior may change in
242+
future versions.
243+
244+
**Examples:**
245+
246+
>>> import bigframes.pandas as bpd
247+
>>> import bigframes.bigquery as bbq
248+
>>> bpd.options.display.progress_bar = None
249+
250+
>>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
251+
>>> s
252+
0 {"class": {"students": [{"id": 5}, {"id": 12}]}}
253+
dtype: string
254+
>>> bbq.parse_json(s)
255+
0 {"class":{"students":[{"id":5},{"id":12}]}}
256+
dtype: large_string[pyarrow]
257+
258+
Args:
259+
input (bigframes.series.Series):
260+
The Series containing JSON-formatted strings).
261+
262+
Returns:
263+
bigframes.series.Series: A new Series with the JSON value.
264+
"""
265+
return input._apply_unary_op(ops.ParseJSON())

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import bigframes_vendored.constants as constants
2121
import bigframes_vendored.ibis.expr.api as ibis_api
2222
import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
23-
import bigframes_vendored.ibis.expr.operations as ibis_ops
2423
import bigframes_vendored.ibis.expr.operations.generic as ibis_generic
2524
import bigframes_vendored.ibis.expr.operations.udf as ibis_udf
2625
import bigframes_vendored.ibis.expr.types as ibis_types
@@ -1181,13 +1180,13 @@ def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet):
11811180
)
11821181
else:
11831182
# Enabling JSON type eliminates the need for less efficient string conversions.
1184-
return ibis_ops.ToJsonString(
1183+
return to_json_string(
11851184
json_set( # type: ignore
1186-
json_obj=parse_json(x),
1185+
json_obj=parse_json(json_str=x),
11871186
json_path=op.json_path,
11881187
json_value=y,
11891188
)
1190-
).to_expr()
1189+
)
11911190

11921191

11931192
@scalar_op_compiler.register_unary_op(ops.JSONExtract, pass_op=True)
@@ -1210,6 +1209,11 @@ def json_extract_string_array_op_impl(
12101209
return json_extract_string_array(json_obj=x, json_path=op.json_path)
12111210

12121211

1212+
@scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
1213+
def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
1214+
return parse_json(json_str=x)
1215+
1216+
12131217
@scalar_op_compiler.register_unary_op(ops.ToJSONString)
12141218
def to_json_string_op_impl(json_obj: ibis_types.Value):
12151219
return to_json_string(json_obj=json_obj)

bigframes/core/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,18 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import functools
1415
import re
1516
import typing
1617
from typing import Hashable, Iterable, List
18+
import warnings
1719

1820
import bigframes_vendored.pandas.io.common as vendored_pandas_io_common
1921
import pandas as pd
2022
import typing_extensions
2123

24+
import bigframes.exceptions as exc
25+
2226
UNNAMED_COLUMN_ID = "bigframes_unnamed_column"
2327
UNNAMED_INDEX_ID = "bigframes_unnamed_index"
2428

@@ -164,3 +168,24 @@ def merge_column_labels(
164168
result_labels.append(col_label)
165169

166170
return pd.Index(result_labels)
171+
172+
173+
def warn_preview(msg=""):
174+
"""Warn a preview API."""
175+
warnings.warn(msg, exc.PreviewWarning)
176+
177+
178+
def preview(*, name: str):
179+
"""Decorate to warn of a preview API."""
180+
181+
def decorator(func):
182+
msg = f"{name} is in preview. Its behavior may change in future versions."
183+
184+
@functools.wraps(func)
185+
def wrapper(*args, **kwargs):
186+
warn_preview(msg=msg)
187+
return func(*args, **kwargs)
188+
189+
return wrapper
190+
191+
return decorator

bigframes/operations/__init__.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,20 @@ def output_type(self, *input_types):
740740
)
741741

742742

743+
@dataclasses.dataclass(frozen=True)
744+
class ParseJSON(UnaryOp):
745+
name: typing.ClassVar[str] = "parse_json"
746+
747+
def output_type(self, *input_types):
748+
input_type = input_types[0]
749+
if input_type != dtypes.STRING_DTYPE:
750+
raise TypeError(
751+
"Input type must be an valid JSON-formatted string type."
752+
+ f" Received type: {input_type}"
753+
)
754+
return dtypes.JSON_DTYPE
755+
756+
743757
@dataclasses.dataclass(frozen=True)
744758
class ToJSONString(UnaryOp):
745759
name: typing.ClassVar[str] = "to_json_string"
@@ -754,9 +768,6 @@ def output_type(self, *input_types):
754768
return dtypes.STRING_DTYPE
755769

756770

757-
to_json_string_op = ToJSONString()
758-
759-
760771
## Blob Ops
761772
@dataclasses.dataclass(frozen=True)
762773
class ObjGetAccessUrl(UnaryOp):

bigframes/operations/blob.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ def image_blur(
110110
)
111111
dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW"))
112112

113-
src_rt = src_rt._apply_unary_op(ops.to_json_string_op)
114-
dst_rt = dst_rt._apply_unary_op(ops.to_json_string_op)
113+
src_rt = src_rt._apply_unary_op(ops.ToJSONString())
114+
dst_rt = dst_rt._apply_unary_op(ops.ToJSONString())
115115

116116
df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer")
117117
df["ksize_x"], df["ksize_y"] = ksize

tests/system/small/bigquery/test_json.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,8 @@ def test_json_in_struct():
209209
"SELECT STRUCT(JSON '{\\\"a\\\": 1}' AS data, 1 AS number) as struct_col"
210210
)
211211
assert df["struct_col"].struct.field("data")[0] == '{"a":1}'
212+
213+
214+
def test_parse_json_w_invalid_series_type():
215+
with pytest.raises(TypeError):
216+
bbq.parse_json(bpd.Series([1, 2]))

0 commit comments

Comments
 (0)