Skip to content

Commit 98bb7cd

Browse files
authored
BUG: fix error in write_dataframe when writing an empty or all-None object column with use_arrow (#512)
1 parent e3c9144 commit 98bb7cd

File tree

3 files changed

+52
-5
lines changed

3 files changed

+52
-5
lines changed

CHANGES.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
### Bug fixes
1212

1313
- Fix WKB writing on big-endian systems (#497).
14-
- Fix writing fids to e.g. GPKG file with use_arrow (#511).
14+
- Fix writing fids to e.g. GPKG file with `use_arrow` (#511).
15+
- Fix error in `write_dataframe` when writing an empty or all-None object
16+
column with `use_arrow` (#512).
1517

1618
### Packaging
1719

pyogrio/geopandas.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,15 @@ def write_dataframe(
586586

587587
table = pa.Table.from_pandas(df, preserve_index=False)
588588

589+
# Null arrow columns are not supported by GDAL, so convert to string
590+
for field_index, field in enumerate(table.schema):
591+
if field.type == pa.null():
592+
table = table.set_column(
593+
field_index,
594+
field.with_type(pa.string()),
595+
table[field_index].cast(pa.string()),
596+
)
597+
589598
if geometry_column is not None:
590599
# ensure that the geometry column is binary (for all-null geometries,
591600
# this could be a wrong type)

pyogrio/tests/test_geopandas_io.py

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,16 +1153,34 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow):
11531153

11541154

11551155
@pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"])
1156+
@pytest.mark.parametrize(
1157+
"columns, dtype",
1158+
[
1159+
([], None),
1160+
(["col_int"], np.int64),
1161+
(["col_float"], np.float64),
1162+
(["col_object"], object),
1163+
],
1164+
)
11561165
@pytest.mark.requires_arrow_write_api
1157-
def test_write_empty_dataframe(tmp_path, ext, use_arrow):
1158-
expected = gp.GeoDataFrame(geometry=[], crs=4326)
1166+
def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow):
1167+
"""Test writing dataframe with no rows.
11591168
1169+
With use_arrow, object type columns with no rows are converted to null type columns
1170+
by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513.
1171+
"""
1172+
expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326)
11601173
filename = tmp_path / f"test{ext}"
11611174
write_dataframe(expected, filename, use_arrow=use_arrow)
11621175

11631176
assert filename.exists()
1164-
df = read_dataframe(filename)
1165-
assert_geodataframe_equal(df, expected)
1177+
df = read_dataframe(filename, use_arrow=use_arrow)
1178+
1179+
# Check result
1180+
# For older pandas versions, the index is created as Object dtype but read as
1181+
# RangeIndex, so don't check the index dtype in that case.
1182+
check_index_type = True if PANDAS_GE_20 else False
1183+
assert_geodataframe_equal(df, expected, check_index_type=check_index_type)
11661184

11671185

11681186
def test_write_empty_geometry(tmp_path):
@@ -1182,6 +1200,24 @@ def test_write_empty_geometry(tmp_path):
11821200
assert_geodataframe_equal(df, expected)
11831201

11841202

1203+
@pytest.mark.requires_arrow_write_api
1204+
def test_write_None_string_column(tmp_path, use_arrow):
1205+
"""Test pandas object columns with all None values.
1206+
1207+
With use_arrow, such columns are converted to null type columns by pyarrow, but null
1208+
columns are not supported by GDAL. Added to test fix for #513.
1209+
"""
1210+
gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326)
1211+
filename = tmp_path / "test.gpkg"
1212+
1213+
write_dataframe(gdf, filename, use_arrow=use_arrow)
1214+
assert filename.exists()
1215+
1216+
result_gdf = read_dataframe(filename, use_arrow=use_arrow)
1217+
assert result_gdf.object_col.dtype == object
1218+
assert_geodataframe_equal(result_gdf, gdf)
1219+
1220+
11851221
@pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"])
11861222
@pytest.mark.requires_arrow_write_api
11871223
def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):

0 commit comments

Comments
 (0)