Skip to content

Commit 05cb7d0

Browse files
authored
feat: support bpd.Series(json_data, dtype="json") (#1882)
* feat: support bpd.Series(json_data, dtype="json") * undo dtypes._dtype_from_string and apply json only
1 parent 52c8233 commit 05cb7d0

File tree

6 files changed

+58
-2
lines changed

6 files changed

+58
-2
lines changed

bigframes/core/indexes/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ def __new__(
8686
pd_df = pandas.DataFrame(index=data)
8787
block = df.DataFrame(pd_df, session=session)._block
8888
else:
89+
if isinstance(dtype, str) and dtype.lower() == "json":
90+
dtype = bigframes.dtypes.JSON_DTYPE
8991
pd_index = pandas.Index(data=data, dtype=dtype, name=name)
9092
pd_df = pandas.DataFrame(index=pd_index)
9193
block = df.DataFrame(pd_df, session=session)._block

bigframes/dataframe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ def __init__(
196196
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
197197

198198
else:
199+
if isinstance(dtype, str) and dtype.lower() == "json":
200+
dtype = bigframes.dtypes.JSON_DTYPE
201+
199202
import bigframes.pandas
200203

201204
pd_dataframe = pandas.DataFrame(

bigframes/operations/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ def __init__(
121121
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
122122
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
123123
else:
124+
if isinstance(dtype, str) and dtype.lower() == "json":
125+
dtype = bigframes.dtypes.JSON_DTYPE
124126
pd_series = pd.Series(
125127
data=data,
126128
index=index, # type:ignore

tests/system/small/test_dataframe.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,26 @@ def test_df_construct_from_dict():
180180
)
181181

182182

183+
@pytest.mark.parametrize(
184+
("json_type"),
185+
[
186+
pytest.param(dtypes.JSON_DTYPE),
187+
pytest.param("json"),
188+
],
189+
)
190+
def test_df_construct_w_json_dtype(json_type):
191+
data = [
192+
"1",
193+
"false",
194+
'["a", {"b": 1}, null]',
195+
None,
196+
]
197+
df = dataframe.DataFrame({"json_col": data}, dtype=json_type)
198+
199+
assert df["json_col"].dtype == dtypes.JSON_DTYPE
200+
assert df["json_col"][1] == "false"
201+
202+
183203
def test_df_construct_inline_respects_location(reset_default_session_and_location):
184204
# Note: This starts a thread-local session.
185205
with bpd.option_context("bigquery.location", "europe-west1"):

tests/system/small/test_index.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import pandas as pd
1919
import pytest
2020

21+
from bigframes import dtypes
2122
import bigframes.pandas as bpd
2223
from bigframes.testing.utils import assert_pandas_index_equal_ignore_index_type
2324

@@ -61,6 +62,26 @@ def test_index_construct_from_index():
6162
pd.testing.assert_index_equal(bf_result, pd_result)
6263

6364

65+
@pytest.mark.parametrize(
66+
("json_type"),
67+
[
68+
pytest.param(dtypes.JSON_DTYPE),
69+
pytest.param("json"),
70+
],
71+
)
72+
def test_index_construct_w_json_dtype(json_type):
73+
data = [
74+
"1",
75+
"false",
76+
'["a", {"b": 1}, null]',
77+
None,
78+
]
79+
index = bpd.Index(data, dtype=json_type)
80+
81+
assert index.dtype == dtypes.JSON_DTYPE
82+
assert index[1] == "false"
83+
84+
6485
def test_get_index(scalars_df_index, scalars_pandas_df_index):
6586
index = scalars_df_index.index
6687
bf_result = index.to_pandas()

tests/system/small/test_series.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,14 @@ def test_series_construct_local_unordered_has_sequential_index(unordered_session
326326
pd.testing.assert_index_equal(series.index.to_pandas(), expected)
327327

328328

329-
def test_series_construct_w_dtype_for_json():
329+
@pytest.mark.parametrize(
330+
("json_type"),
331+
[
332+
pytest.param(dtypes.JSON_DTYPE),
333+
pytest.param("json"),
334+
],
335+
)
336+
def test_series_construct_w_json_dtype(json_type):
330337
data = [
331338
"1",
332339
'"str"',
@@ -335,8 +342,9 @@ def test_series_construct_w_dtype_for_json():
335342
None,
336343
'{"a": {"b": [1, 2, 3], "c": true}}',
337344
]
338-
s = bigframes.pandas.Series(data, dtype=dtypes.JSON_DTYPE)
345+
s = bigframes.pandas.Series(data, dtype=json_type)
339346

347+
assert s.dtype == dtypes.JSON_DTYPE
340348
assert s[0] == "1"
341349
assert s[1] == '"str"'
342350
assert s[2] == "false"

0 commit comments

Comments
 (0)