|
51 | 51 | import geopandas as gp |
52 | 52 | import pandas as pd |
53 | 53 | from geopandas.array import from_wkt |
54 | | - from pandas.api.types import is_datetime64_dtype, is_object_dtype, is_string_dtype |
| 54 | + from pandas.api.types import ( |
| 55 | + is_bool_dtype, |
| 56 | + is_datetime64_dtype, |
| 57 | + is_float_dtype, |
| 58 | + is_integer_dtype, |
| 59 | + is_object_dtype, |
| 60 | + is_string_dtype, |
| 61 | + ) |
55 | 62 |
|
56 | 63 | import shapely # if geopandas is present, shapely is expected to be present |
57 | 64 | from shapely.geometry import Point |
@@ -515,6 +522,93 @@ def test_read_list_nested_struct_parquet_file( |
515 | 522 | assert result["col_struct"][2] == {"a": 1, "b": 2} |
516 | 523 |
|
517 | 524 |
|
| 525 | +@pytest.mark.requires_arrow_write_api |
| 526 | +def test_roundtrip_many_data_types_geojson_file( |
| 527 | + request, tmp_path, many_data_types_geojson_file, use_arrow |
| 528 | +): |
| 529 | + """Test roundtripping a GeoJSON file containing many data types.""" |
| 530 | + |
| 531 | + def validate_result(df: pd.DataFrame, use_arrow: bool, ignore_mixed_list_col=False): |
| 532 | + """Function to validate the data of many_data_types_geojson_file. |
| 533 | +
|
| 534 | + Depending on arrow being used or not there are small differences. |
| 535 | + """ |
| 536 | + assert "int_col" in df.columns |
| 537 | + assert is_integer_dtype(df["int_col"].dtype) |
| 538 | + assert df["int_col"].to_list() == [1] |
| 539 | + |
| 540 | + assert "float_col" in df.columns |
| 541 | + assert is_float_dtype(df["float_col"].dtype) |
| 542 | + assert df["float_col"].to_list() == [1.5] |
| 543 | + |
| 544 | + assert "str_col" in df.columns |
| 545 | + assert is_string_dtype(df["str_col"].dtype) |
| 546 | + assert df["str_col"].to_list() == ["string"] |
| 547 | + |
| 548 | + assert "bool_col" in df.columns |
| 549 | + assert is_bool_dtype(df["bool_col"].dtype) |
| 550 | + assert df["bool_col"].to_list() == [True] |
| 551 | + |
| 552 | + assert "date_col" in df.columns |
| 553 | + if use_arrow: |
| 554 | + # Arrow returns dates as datetime.date objects. |
| 555 | + assert is_object_dtype(df["date_col"].dtype) |
| 556 | + assert df["date_col"].to_list() == [pd.Timestamp("2020-01-01").date()] |
| 557 | + else: |
| 558 | + # Without arrow, date columns are returned as datetime64. |
| 559 | + assert is_datetime64_dtype(df["date_col"].dtype) |
| 560 | + assert df["date_col"].to_list() == [pd.Timestamp("2020-01-01")] |
| 561 | + |
| 562 | + # Ignore time columns till this is solved: |
| 563 | + # Reported in https://github.com/geopandas/pyogrio/issues/615 |
| 564 | + # assert "time_col" in df.columns |
| 565 | + # assert is_object_dtype(df["time_col"].dtype) |
| 566 | + # assert df["time_col"].to_list() == [time(12, 0, 0)] |
| 567 | + |
| 568 | + assert "datetime_col" in df.columns |
| 569 | + assert is_datetime64_dtype(df["datetime_col"].dtype) |
| 570 | + assert df["datetime_col"].to_list() == [pd.Timestamp("2020-01-01T12:00:00")] |
| 571 | + |
| 572 | + assert "list_int_col" in df.columns |
| 573 | + assert is_object_dtype(df["list_int_col"].dtype) |
| 574 | + assert df["list_int_col"][0].tolist() == [1, 2, 3] |
| 575 | + |
| 576 | + assert "list_str_col" in df.columns |
| 577 | + assert is_object_dtype(df["list_str_col"].dtype) |
| 578 | + assert df["list_str_col"][0].tolist() == ["a", "b", "c"] |
| 579 | + |
| 580 | + if not ignore_mixed_list_col: |
| 581 | + assert "list_mixed_col" in df.columns |
| 582 | + assert is_object_dtype(df["list_mixed_col"].dtype) |
| 583 | + assert df["list_mixed_col"][0] == [1, "a", None, True] |
| 584 | + |
| 585 | + # Read and validate result of reading |
| 586 | + read_gdf = read_dataframe(many_data_types_geojson_file, use_arrow=use_arrow) |
| 587 | + validate_result(read_gdf, use_arrow) |
| 588 | + |
| 589 | + # Write the data read, read it back, and validate again |
| 590 | + if use_arrow: |
| 591 | + # Writing a column with mixed types in a list is not supported with Arrow. |
| 592 | + ignore_mixed_list_col = True |
| 593 | + read_gdf = read_gdf.drop(columns=["list_mixed_col"]) |
| 594 | + else: |
| 595 | + ignore_mixed_list_col = False |
| 596 | + request.node.add_marker( |
| 597 | + pytest.mark.xfail( |
| 598 | + reason="roundtripping list types fails with use_arrow=False" |
| 599 | + ) |
| 600 | + ) |
| 601 | + |
| 602 | + tmp_file = tmp_path / "temp.geojson" |
| 603 | + write_dataframe(read_gdf, tmp_file, use_arrow=use_arrow) |
| 604 | + |
| 605 | + # Validate data written |
| 606 | + read_back_gdf = read_dataframe(tmp_file, use_arrow=use_arrow) |
| 607 | + validate_result( |
| 608 | + read_back_gdf, use_arrow, ignore_mixed_list_col=ignore_mixed_list_col |
| 609 | + ) |
| 610 | + |
| 611 | + |
518 | 612 | @pytest.mark.filterwarnings( |
519 | 613 | "ignore: Non-conformant content for record 1 in column dates" |
520 | 614 | ) |
|
0 commit comments