Skip to content

Commit 73d5af2

Browse files
authored
ENH: add automatic detection of 2.5D in write_dataframe (#223)
1 parent 31d7571 commit 73d5af2

File tree

4 files changed

+109
-6
lines changed

4 files changed

+109
-6
lines changed

CHANGES.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
### Improvements
66

7-
- ENH: add "driver" property to read_info result (#224)
7+
- Add automatic detection of 2.5D geometries in write_dataframe (#223)
8+
- Add "driver" property to read_info result (#224)
89

910
## 0.5.1 (2023-01-26)
1011

pyogrio/geopandas.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI
1+
import numpy as np
2+
from pyogrio.raw import DRIVERS_NO_MIXED_SINGLE_MULTI, DRIVERS_NO_MIXED_DIMENSIONS
23
from pyogrio.raw import detect_driver, read, read_arrow, write
4+
from pyogrio.errors import DataSourceError
35

46

57
def _stringify_path(path):
@@ -309,9 +311,22 @@ def write_dataframe(
309311
# Determine geometry_type and/or promote_to_multi
310312
if geometry_type is None or promote_to_multi is None:
311313
tmp_geometry_type = "Unknown"
314+
has_z = False
312315

313316
# If there is data, infer layer geometry type + promote_to_multi
314317
if not df.empty:
318+
# None/Empty geometries sometimes report as Z incorrectly, so ignore them
319+
has_z_arr = geometry[
320+
(geometry != np.array(None)) & (~geometry.is_empty)
321+
].has_z
322+
has_z = has_z_arr.any()
323+
all_z = has_z_arr.all()
324+
325+
if driver in DRIVERS_NO_MIXED_DIMENSIONS and has_z and not all_z:
326+
raise DataSourceError(
327+
f"Mixed 2D and 3D coordinates are not supported by {driver}"
328+
)
329+
315330
geometry_types = pd.Series(geometry.type.unique()).dropna().values
316331
if len(geometry_types) == 1:
317332
tmp_geometry_type = geometry_types[0]
@@ -347,6 +362,8 @@ def write_dataframe(
347362

348363
if geometry_type is None:
349364
geometry_type = tmp_geometry_type
365+
if has_z:
366+
geometry_type = f"2.5D {geometry_type}"
350367

351368
crs = None
352369
if geometry.crs:

pyogrio/raw.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
"GPKG",
3333
}
3434

35+
DRIVERS_NO_MIXED_DIMENSIONS = {
36+
"FlatGeobuf",
37+
}
38+
3539

3640
def read(
3741
path_or_buffer,

pyogrio/tests/test_geopandas_io.py

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@
77
from pyogrio import list_layers, read_info, __gdal_version__, __gdal_geos_version__
88
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
99
from pyogrio.geopandas import read_dataframe, write_dataframe
10-
from pyogrio.raw import DRIVERS, DRIVERS_NO_MIXED_SINGLE_MULTI
10+
from pyogrio.raw import (
11+
DRIVERS,
12+
DRIVERS_NO_MIXED_DIMENSIONS,
13+
DRIVERS_NO_MIXED_SINGLE_MULTI,
14+
)
1115
from pyogrio.tests.conftest import ALL_EXTS
1216

1317
try:
1418
import pandas as pd
1519
from pandas.testing import assert_frame_equal, assert_index_equal
1620

1721
import geopandas as gp
22+
from geopandas.array import from_wkt
1823
from geopandas.testing import assert_geodataframe_equal
1924

2025
from shapely.geometry import Point
@@ -843,17 +848,93 @@ def test_write_read_null(tmp_path):
843848
],
844849
)
845850
def test_write_geometry_z_types(tmp_path, wkt, geom_types):
846-
from geopandas.array import from_wkt
847-
848851
filename = tmp_path / "test.fgb"
849-
850852
gdf = gp.GeoDataFrame(geometry=from_wkt([wkt]), crs="EPSG:4326")
851853
for geom_type in geom_types:
852854
write_dataframe(gdf, filename, geometry_type=geom_type)
853855
df = read_dataframe(filename)
854856
assert_geodataframe_equal(df, gdf)
855857

856858

859+
@pytest.mark.parametrize("ext", ALL_EXTS)
860+
@pytest.mark.parametrize(
861+
"test_descr, exp_geometry_type, mixed_dimensions, wkt",
862+
[
863+
("1 Point Z", "2.5D Point", False, ["Point Z (0 0 0)"]),
864+
("1 LineString Z", "2.5D LineString", False, ["LineString Z (0 0 0, 1 1 0)"]),
865+
(
866+
"1 Polygon Z",
867+
"2.5D Polygon",
868+
False,
869+
["Polygon Z ((0 0 0, 0 1 0, 1 1 0, 0 0 0))"],
870+
),
871+
("1 MultiPoint Z", "2.5D MultiPoint", False, ["MultiPoint Z (0 0 0, 1 1 0)"]),
872+
(
873+
"1 MultiLineString Z",
874+
"2.5D MultiLineString",
875+
False,
876+
["MultiLineString Z ((0 0 0, 1 1 0), (2 2 2, 3 3 2))"],
877+
),
878+
(
879+
"1 MultiLinePolygon Z",
880+
"2.5D MultiPolygon",
881+
False,
882+
[
883+
"MultiPolygon Z (((0 0 0, 0 1 0, 1 1 0, 0 0 0)), ((1 1 1, 1 2 1, 2 2 1, 1 1 1)))" # noqa: E501
884+
],
885+
),
886+
(
887+
"1 GeometryCollection Z",
888+
"2.5D GeometryCollection",
889+
False,
890+
["GeometryCollection Z (Point Z (0 0 0))"],
891+
),
892+
("Point Z + Point", "2.5D Point", True, ["Point Z (0 0 0)", "Point (0 0)"]),
893+
("Point Z + None", "2.5D Point", False, ["Point Z (0 0 0)", None]),
894+
],
895+
)
896+
def test_write_geometry_z_types_auto(
897+
tmp_path, ext, test_descr, exp_geometry_type, mixed_dimensions, wkt
898+
):
899+
# Shapefile has some different behaviour that other file types
900+
if ext == ".shp":
901+
if exp_geometry_type == "2.5D GeometryCollection":
902+
pytest.skip(f"ext {ext} doesn't support {exp_geometry_type}")
903+
elif exp_geometry_type == "2.5D MultiLineString":
904+
exp_geometry_type = "2.5D LineString"
905+
elif exp_geometry_type == "2.5D MultiPolygon":
906+
exp_geometry_type = "2.5D Polygon"
907+
908+
column_data = {}
909+
column_data["test_descr"] = [test_descr] * len(wkt)
910+
column_data["idx"] = [str(idx) for idx in range(len(wkt))]
911+
gdf = gp.GeoDataFrame(column_data, geometry=from_wkt(wkt), crs="EPSG:4326")
912+
filename = tmp_path / f"test{ext}"
913+
914+
if mixed_dimensions and DRIVERS[ext] in DRIVERS_NO_MIXED_DIMENSIONS:
915+
with pytest.raises(
916+
DataSourceError,
917+
match=("Mixed 2D and 3D coordinates are not supported by"),
918+
):
919+
write_dataframe(gdf, filename)
920+
return
921+
else:
922+
write_dataframe(gdf, filename)
923+
924+
info = read_info(filename)
925+
assert info["geometry_type"] == exp_geometry_type
926+
927+
result_gdf = read_dataframe(filename)
928+
if ext == ".geojsonl":
929+
result_gdf.crs = "EPSG:4326"
930+
if ext == ".fgb":
931+
# When the following gdal issue is released, this if needs to be removed:
932+
# https://github.com/OSGeo/gdal/issues/7401
933+
gdf = gdf.loc[~((gdf.geometry == np.array(None)) | gdf.geometry.is_empty)]
934+
935+
assert_geodataframe_equal(gdf, result_gdf)
936+
937+
857938
def test_read_multisurface(data_dir):
858939
df = read_dataframe(data_dir / "test_multisurface.gpkg")
859940

0 commit comments

Comments
 (0)