|
15 | 15 | # specific language governing permissions and limitations |
16 | 16 | # under the License. |
17 | 17 |
|
18 | | -import pytest |
| 18 | +import json |
19 | 19 | import tempfile |
20 | | -import shapely |
| 20 | +from pathlib import Path |
| 21 | + |
21 | 22 | import geopandas |
22 | 23 | import geopandas.testing |
| 24 | +import pytest |
| 25 | +import shapely |
23 | 26 | from pyarrow import parquet |
24 | | -from pathlib import Path |
25 | | -from sedonadb.testing import geom_or_null, SedonaDB, DuckDB, skip_if_not_exists |
| 27 | +from sedonadb._lib import SedonaError |
| 28 | +from sedonadb.testing import DuckDB, SedonaDB, geom_or_null, skip_if_not_exists |
26 | 29 |
|
27 | 30 |
|
28 | 31 | @pytest.mark.parametrize("name", ["water-junc", "water-point"]) |
@@ -257,6 +260,68 @@ def test_write_geoparquet_geometry(con, geoarrow_data, name): |
257 | 260 | geopandas.testing.assert_geodataframe_equal(gdf_roundtrip, gdf) |
258 | 261 |
|
259 | 262 |
|
| 263 | +def test_write_geoparquet_1_1(con, geoarrow_data): |
| 264 | + # Checks GeoParquet 1.1 support specifically |
| 265 | + path = geoarrow_data / "ns-water" / "files" / "ns-water_water-junc_geo.parquet" |
| 266 | + skip_if_not_exists(path) |
| 267 | + |
| 268 | + gdf = geopandas.read_parquet(path).sort_values(by="OBJECTID").reset_index(drop=True) |
| 269 | + |
| 270 | + with tempfile.TemporaryDirectory() as td: |
| 271 | + tmp_parquet = Path(td) / "tmp.parquet" |
| 272 | + con.create_data_frame(gdf).to_parquet( |
| 273 | + tmp_parquet, sort_by="OBJECTID", geoparquet_version="1.1" |
| 274 | + ) |
| 275 | + |
| 276 | + file_kv_metadata = parquet.ParquetFile(tmp_parquet).metadata.metadata |
| 277 | + assert b"geo" in file_kv_metadata |
| 278 | + geo_metadata = json.loads(file_kv_metadata[b"geo"]) |
| 279 | + assert geo_metadata["version"] == "1.1.0" |
| 280 | + geo_column = geo_metadata["columns"]["geometry"] |
| 281 | + assert geo_column["covering"] == { |
| 282 | + "bbox": { |
| 283 | + "xmin": ["bbox", "xmin"], |
| 284 | + "ymin": ["bbox", "ymin"], |
| 285 | + "xmax": ["bbox", "xmax"], |
| 286 | + "ymax": ["bbox", "ymax"], |
| 287 | + } |
| 288 | + } |
| 289 | + |
| 290 | + # This should still roundtrip through GeoPandas because GeoPandas removes |
| 291 | + # the bbox column on read |
| 292 | + gdf_roundtrip = geopandas.read_parquet(tmp_parquet) |
| 293 | + assert all(gdf.columns == gdf_roundtrip.columns) |
| 294 | + geopandas.testing.assert_geodataframe_equal(gdf_roundtrip, gdf) |
| 295 | + |
| 296 | + # ...but the bbox column should still be there |
| 297 | + df_roundtrip = con.read_parquet(tmp_parquet).to_pandas() |
| 298 | + assert "bbox" in df_roundtrip.columns |
| 299 | + |
| 300 | + # An attempt to rewrite this should fail because it would have to overwrite |
| 301 | + # the bbox column |
| 302 | + tmp_parquet2 = Path(td) / "tmp2.parquet" |
| 303 | + with pytest.raises( |
| 304 | + SedonaError, match="Can't overwrite GeoParquet 1.1 bbox column 'bbox'" |
| 305 | + ): |
| 306 | + con.read_parquet(tmp_parquet).to_parquet( |
| 307 | + tmp_parquet2, geoparquet_version="1.1" |
| 308 | + ) |
| 309 | + |
| 310 | + # ...unless we pass the appropriate option |
| 311 | + con.read_parquet(tmp_parquet).to_parquet( |
| 312 | + tmp_parquet2, geoparquet_version="1.1", overwrite_bbox_columns=True |
| 313 | + ) |
| 314 | + df_roundtrip = con.read_parquet(tmp_parquet2).to_pandas() |
| 315 | + assert "bbox" in df_roundtrip.columns |
| 316 | + |
| 317 | + |
| 318 | +def test_write_geoparquet_unknown(con): |
| 319 | + with pytest.raises(SedonaError, match="Unexpected GeoParquet version string"): |
| 320 | + con.sql("SELECT 1 as one").to_parquet( |
| 321 | + "unused", geoparquet_version="not supported" |
| 322 | + ) |
| 323 | + |
| 324 | + |
260 | 325 | def test_write_geoparquet_geography(con, geoarrow_data): |
261 | 326 | # Checks a read and write of geography (rounctrip, since nobody else can read/write) |
262 | 327 | path = ( |
|
0 commit comments