Skip to content

Commit 8020035

Browse files
BUG: fix issue reading with use_arrow=True after having read a Parquet file (#601)
1 parent 45d2dca commit 8020035

File tree

5 files changed

+37
-2
lines changed

5 files changed

+37
-2
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
- Fix decode error reading an sqlite file on windows (#568).
1919
- Fix wrong layername when creating .gpkg.zip file (#570).
2020
- Fix segfault on providing an invalid value for `layer` in `read_info` (#564).
21+
- Fix error when reading data with ``use_arrow=True`` after having used the
22+
Parquet driver with GDAL>=3.12 (#601).
2123

2224
### Packaging
2325

ci/envs/latest.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ channels:
44
dependencies:
55
- numpy
66
- libgdal-core
7+
- libgdal-arrow-parquet
78
- pytest
89
- shapely>=2
910
- geopandas-base

pyogrio/geopandas.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pyogrio._compat import (
1010
HAS_GEOPANDAS,
11+
HAS_PYARROW,
1112
PANDAS_GE_15,
1213
PANDAS_GE_20,
1314
PANDAS_GE_22,
@@ -346,8 +347,18 @@ def read_dataframe(
346347
elif geometry_name in df.columns:
347348
wkb_values = df.pop(geometry_name)
348349
if PANDAS_GE_15 and wkb_values.dtype != object:
349-
# for example ArrowDtype will otherwise create numpy array with pd.NA
350-
wkb_values = wkb_values.to_numpy(na_value=None)
350+
if (
351+
HAS_PYARROW
352+
and isinstance(wkb_values.dtype, pd.ArrowDtype)
353+
and isinstance(wkb_values.dtype.pyarrow_dtype, pa.BaseExtensionType)
354+
):
355+
# handle BaseExtensionType(extension<geoarrow.wkb>)
356+
wkb_values = pa.array(wkb_values.array).to_numpy(
357+
zero_copy_only=False
358+
)
359+
else:
360+
# for example ArrowDtype will otherwise give numpy array with pd.NA
361+
wkb_values = wkb_values.to_numpy(na_value=None)
351362
df["geometry"] = shapely.from_wkb(wkb_values, on_invalid=on_invalid)
352363
if force_2d:
353364
df["geometry"] = shapely.force_2d(df["geometry"])

pyogrio/tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
".shp": 0,
5252
}
5353

54+
GDAL_HAS_PARQUET_DRIVER = "Parquet" in list_drivers()
55+
5456

5557
def pytest_report_header(config):
5658
drivers = ", ".join(

pyogrio/tests/test_geopandas_io.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from pyogrio.tests.conftest import (
3636
ALL_EXTS,
3737
DRIVERS,
38+
GDAL_HAS_PARQUET_DRIVER,
3839
START_FID,
3940
requires_arrow_write_api,
4041
requires_gdal_geos,
@@ -2644,3 +2645,21 @@ def test_write_geojson_rfc7946_coordinates(tmp_path, use_arrow):
26442645

26452646
gdf_in_appended = read_dataframe(output_path, use_arrow=use_arrow)
26462647
assert np.array_equal(gdf_in_appended.geometry.values, points + points_append)
2648+
2649+
2650+
@pytest.mark.requires_arrow_api
2651+
@pytest.mark.skipif(
2652+
not GDAL_HAS_PARQUET_DRIVER, reason="Parquet driver is not available"
2653+
)
2654+
def test_parquet_driver(tmp_path, use_arrow):
2655+
"""
2656+
Simple test verifying the Parquet driver works if available
2657+
"""
2658+
gdf = gp.GeoDataFrame(
2659+
{"col": [1, 2, 3], "geometry": [Point(0, 0), Point(1, 1), Point(2, 2)]},
2660+
crs="EPSG:4326",
2661+
)
2662+
output_path = tmp_path / "test.parquet"
2663+
write_dataframe(gdf, output_path, use_arrow=use_arrow)
2664+
result = read_dataframe(output_path, use_arrow=use_arrow)
2665+
assert_geodataframe_equal(result, gdf)

0 commit comments

Comments
 (0)