Skip to content

Commit b4da1cf

Browse files
authored
fix: prevent KeyError in bpd.concat with empty DF and struct/array types DF (#1568)
1 parent 8c50755 commit b4da1cf

File tree

4 files changed

+16
-4
lines changed

4 files changed

+16
-4
lines changed

bigframes/core/compile/ibis_types.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ def literal_to_ibis_scalar(
388388
# Ibis has bug for casting nulltype to geospatial, so we perform intermediate cast first
389389
geotype = ibis_dtypes.GeoSpatial(geotype="geography", srid=4326, nullable=True)
390390
return bigframes_vendored.ibis.literal(None, geotype)
391-
ibis_dtype = BIGFRAMES_TO_IBIS[force_dtype] if force_dtype else None
391+
392+
ibis_dtype = bigframes_dtype_to_ibis_dtype(force_dtype) if force_dtype else None
392393

393394
if pd.api.types.is_list_like(literal):
394395
if validate:

tests/system/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,14 +465,15 @@ def nested_structs_df(
465465

466466

467467
@pytest.fixture(scope="session")
468-
def nested_structs_pandas_df() -> pd.DataFrame:
468+
def nested_structs_pandas_df(nested_structs_pandas_type: pd.ArrowDtype) -> pd.DataFrame:
469469
"""pd.DataFrame pointing at test data."""
470470

471471
df = pd.read_json(
472472
DATA_DIR / "nested_structs.jsonl",
473473
lines=True,
474474
)
475475
df = df.set_index("id")
476+
df["person"] = df["person"].astype(nested_structs_pandas_type)
476477
return df
477478

478479

tests/system/small/test_pandas.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ def test_concat_dataframe(scalars_dfs, ordered):
4040
assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
4141

4242

43+
def test_concat_dataframe_w_struct_cols(nested_structs_df, nested_structs_pandas_df):
44+
"""Avoid regressions for internal issue 407107482"""
45+
empty_bf_df = bpd.DataFrame(session=nested_structs_df._block.session)
46+
bf_result = bpd.concat((empty_bf_df, nested_structs_df), ignore_index=True)
47+
bf_result = bf_result.to_pandas()
48+
pd_result = pd.concat((pd.DataFrame(), nested_structs_pandas_df), ignore_index=True)
49+
pd_result.index = pd_result.index.astype("Int64")
50+
pd.testing.assert_frame_equal(bf_result, pd_result)
51+
52+
4353
def test_concat_series(scalars_dfs):
4454
scalars_df, scalars_pandas_df = scalars_dfs
4555
bf_result = bpd.concat(

tests/system/small/test_series.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4381,13 +4381,13 @@ def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col
43814381

43824382

43834383
def test_series_struct_get_field_by_attribute(
4384-
nested_structs_df, nested_structs_pandas_df, nested_structs_pandas_type
4384+
nested_structs_df, nested_structs_pandas_df
43854385
):
43864386
if Version(pd.__version__) < Version("2.2.0"):
43874387
pytest.skip("struct accessor is not supported before pandas 2.2")
43884388

43894389
bf_series = nested_structs_df["person"]
4390-
df_series = nested_structs_pandas_df["person"].astype(nested_structs_pandas_type)
4390+
df_series = nested_structs_pandas_df["person"]
43914391

43924392
pd.testing.assert_series_equal(
43934393
bf_series.address.city.to_pandas(),

0 commit comments

Comments
 (0)