Skip to content

Commit 7aed64d

Browse files
authored
ENH: add support to read and write .gpkg.zip and .shp.zip (#527)
1 parent e32f43c commit 7aed64d

File tree

8 files changed

+71
-5
lines changed

8 files changed

+71
-5
lines changed

CHANGES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
### Improvements
66

77
- Capture all errors logged by gdal when opening a file fails (#495).
8+
- Add support to read and write ".gpkg.zip" (GDAL >= 3.7), ".shp.zip", and ".shz"
9+
files (#527).
810

911
### Bug fixes
1012

pyogrio/_compat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
PANDAS_GE_22 = pandas is not None and Version(pandas.__version__) >= Version("2.2.0")
4242

4343
GDAL_GE_352 = __gdal_version__ >= (3, 5, 2)
44+
GDAL_GE_37 = __gdal_version__ >= (3, 7, 0)
4445
GDAL_GE_38 = __gdal_version__ >= (3, 8, 0)
4546

4647
HAS_GDAL_GEOS = __gdal_geos_version__ is not None

pyogrio/tests/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@
3131
".geojsonl": "GeoJSONSeq",
3232
".geojsons": "GeoJSONSeq",
3333
".gpkg": "GPKG",
34+
".gpkg.zip": "GPKG",
3435
".shp": "ESRI Shapefile",
36+
".shp.zip": "ESRI Shapefile",
37+
".shz": "ESRI Shapefile",
3538
}
3639

3740
# mapping of driver name to extension

pyogrio/tests/test_arrow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,9 @@ def test_write_append(request, tmp_path, naturalearth_lowres, ext):
643643
pytest.mark.xfail(reason="Bugs with append when writing Arrow to GeoJSON")
644644
)
645645

646+
if ext == ".gpkg.zip":
647+
pytest.skip("Append is not supported for .gpkg.zip")
648+
646649
meta, table = read_arrow(naturalearth_lowres)
647650

648651
# coerce output layer to generic Geometry to avoid mixed type errors

pyogrio/tests/test_geopandas_io.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,13 @@
1616
vsi_listtree,
1717
vsi_unlink,
1818
)
19-
from pyogrio._compat import GDAL_GE_352, HAS_ARROW_WRITE_API, HAS_PYPROJ, PANDAS_GE_15
19+
from pyogrio._compat import (
20+
GDAL_GE_37,
21+
GDAL_GE_352,
22+
HAS_ARROW_WRITE_API,
23+
HAS_PYPROJ,
24+
PANDAS_GE_15,
25+
)
2026
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError, GeometryError
2127
from pyogrio.geopandas import PANDAS_GE_20, read_dataframe, write_dataframe
2228
from pyogrio.raw import (
@@ -1580,6 +1586,30 @@ def test_custom_crs_io(tmp_path, naturalearth_lowres_all_ext, use_arrow):
15801586
assert df.crs.equals(expected.crs)
15811587

15821588

1589+
@pytest.mark.parametrize("ext", [".gpkg.zip", ".shp.zip", ".shz"])
1590+
@pytest.mark.requires_arrow_write_api
1591+
def test_write_read_zipped_ext(tmp_path, naturalearth_lowres, ext, use_arrow):
1592+
"""Run a basic read and write test on some extra (zipped) extensions."""
1593+
if ext == ".gpkg.zip" and not GDAL_GE_37:
1594+
pytest.skip(".gpkg.zip support requires GDAL >= 3.7")
1595+
1596+
input_gdf = read_dataframe(naturalearth_lowres)
1597+
output_path = tmp_path / f"test{ext}"
1598+
1599+
write_dataframe(input_gdf, output_path, use_arrow=use_arrow)
1600+
1601+
assert output_path.exists()
1602+
result_gdf = read_dataframe(output_path)
1603+
1604+
geometry_types = result_gdf.geometry.type.unique()
1605+
if DRIVERS[ext] in DRIVERS_NO_MIXED_SINGLE_MULTI:
1606+
assert list(geometry_types) == ["MultiPolygon"]
1607+
else:
1608+
assert set(geometry_types) == {"MultiPolygon", "Polygon"}
1609+
1610+
assert_geodataframe_equal(result_gdf, input_gdf, check_index_type=False)
1611+
1612+
15831613
def test_write_read_mixed_column_values(tmp_path):
15841614
# use_arrow=True is tested separately below
15851615
mixed_values = ["test", 1.0, 1, datetime.now(), None, np.nan]

pyogrio/tests/test_path.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,20 @@ def change_cwd(path):
3333
[
3434
# local file paths that should be passed through as is
3535
("data.gpkg", "data.gpkg"),
36+
("data.gpkg.zip", "data.gpkg.zip"),
37+
("data.shp.zip", "data.shp.zip"),
3638
(Path("data.gpkg"), "data.gpkg"),
39+
(Path("data.gpkg.zip"), "data.gpkg.zip"),
40+
(Path("data.shp.zip"), "data.shp.zip"),
3741
("/home/user/data.gpkg", "/home/user/data.gpkg"),
42+
("/home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
43+
("/home/user/data.shp.zip", "/home/user/data.shp.zip"),
3844
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"),
45+
(r"C:\User\Documents\data.gpkg.zip", r"C:\User\Documents\data.gpkg.zip"),
46+
(r"C:\User\Documents\data.shp.zip", r"C:\User\Documents\data.shp.zip"),
3947
("file:///home/user/data.gpkg", "/home/user/data.gpkg"),
48+
("file:///home/user/data.gpkg.zip", "/home/user/data.gpkg.zip"),
49+
("file:///home/user/data.shp.zip", "/home/user/data.shp.zip"),
4050
("/home/folder # with hash/data.gpkg", "/home/folder # with hash/data.gpkg"),
4151
# cloud URIs
4252
("https://testing/data.gpkg", "/vsicurl/https://testing/data.gpkg"),

pyogrio/tests/test_raw_io.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
read_info,
1818
set_gdal_config_options,
1919
)
20-
from pyogrio._compat import HAS_PYARROW, HAS_SHAPELY
20+
from pyogrio._compat import GDAL_GE_37, HAS_PYARROW, HAS_SHAPELY
2121
from pyogrio.errors import DataLayerError, DataSourceError, FeatureError
2222
from pyogrio.raw import open_arrow, read, write
2323
from pyogrio.tests.conftest import (
@@ -63,9 +63,10 @@ def test_read(naturalearth_lowres):
6363
@pytest.mark.parametrize("ext", DRIVERS)
6464
def test_read_autodetect_driver(tmp_path, naturalearth_lowres, ext):
6565
# Test all supported autodetect drivers
66+
if ext == ".gpkg.zip" and not GDAL_GE_37:
67+
pytest.skip(".gpkg.zip not supported for gdal < 3.7.0")
6668
testfile = prepare_testfile(naturalearth_lowres, dst_dir=tmp_path, ext=ext)
6769

68-
assert testfile.suffix == ext
6970
assert testfile.exists()
7071
meta, _, geometry, fields = read(testfile)
7172

@@ -703,6 +704,9 @@ def test_write_append(tmp_path, naturalearth_lowres, ext):
703704
if ext in (".geojsonl", ".geojsons") and __gdal_version__ < (3, 6, 0):
704705
pytest.skip("Append to GeoJSONSeq only available for GDAL >= 3.6.0")
705706

707+
if ext == ".gpkg.zip":
708+
pytest.skip("Append to .gpkg.zip is not supported")
709+
706710
meta, _, geometry, field_data = read(naturalearth_lowres)
707711

708712
# coerce output layer to MultiPolygon to avoid mixed type errors

pyogrio/util.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
from pyogrio._vsi import vsimem_rmtree_toplevel as _vsimem_rmtree_toplevel
1111

12+
MULTI_EXTENSIONS = (".gpkg.zip", ".shp.zip")
13+
1214

1315
def get_vsi_path_or_buffer(path_or_buffer):
1416
"""Get VSI-prefixed path or bytes buffer depending on type of path_or_buffer.
@@ -68,15 +70,23 @@ def vsi_path(path: Union[str, Path]) -> str:
6870
# Windows drive letters (e.g. "C:\") confuse `urlparse` as they look like
6971
# URL schemes
7072
if sys.platform == "win32" and re.match("^[a-zA-Z]\\:", path):
73+
# If it is not a zip file or it is multi-extension zip file that is directly
74+
# supported by a GDAL driver, return the path as is.
7175
if not path.split("!")[0].endswith(".zip"):
7276
return path
77+
if path.split("!")[0].endswith(MULTI_EXTENSIONS):
78+
return path
7379

7480
# prefix then allow to proceed with remaining parsing
7581
path = f"zip://{path}"
7682

7783
path, archive, scheme = _parse_uri(path)
7884

79-
if scheme or archive or path.endswith(".zip"):
85+
if (
86+
scheme
87+
or archive
88+
or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
89+
):
8090
return _construct_vsi_path(path, archive, scheme)
8191

8292
return path
@@ -146,7 +156,10 @@ def _construct_vsi_path(path, archive, scheme) -> str:
146156
suffix = ""
147157
schemes = scheme.split("+")
148158

149-
if "zip" not in schemes and (archive.endswith(".zip") or path.endswith(".zip")):
159+
if "zip" not in schemes and (
160+
archive.endswith(".zip")
161+
or (path.endswith(".zip") and not path.endswith(MULTI_EXTENSIONS))
162+
):
150163
schemes.insert(0, "zip")
151164

152165
if schemes:

0 commit comments

Comments
 (0)