Skip to content

Commit d5983c5

Browse files
authored
BUG: fix regressions on date-typed columns (#616)
1 parent 2a022c3 commit d5983c5

File tree

5 files changed

+144
-3
lines changed

5 files changed

+144
-3
lines changed

CHANGES.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# CHANGELOG
22

3+
## 0.12.1 (????-??-??)
4+
5+
### Bug fixes
6+
7+
- Fix regression in reading date columns (#616)
8+
39
## 0.12.0 (2025-11-26)
410

511
### Potentially breaking changes

pyogrio/_io.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ cdef process_fields(
978978

979979
elif field_type == OFTDateTime or field_type == OFTDate:
980980

981-
if datetime_as_string:
981+
if field_type == OFTDateTime and datetime_as_string:
982982
# defer datetime parsing to user/ pandas layer
983983
IF CTE_GDAL_VERSION >= (3, 7, 0):
984984
data[i] = get_string(

pyogrio/geopandas.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,12 @@ def read_dataframe(
440440

441441
# convert datetime columns that were read as string to datetime
442442
for dtype, column in zip(meta["dtypes"], meta["fields"]):
443-
if dtype is not None and dtype.startswith("datetime"):
443+
# With arrow, date columns are returned as datetime.date objects.
444+
if (
445+
dtype is not None
446+
and dtype.startswith("datetime")
447+
and dtype != "datetime64[D]"
448+
):
444449
df[column] = _try_parse_datetime(
445450
df[column], datetime_as_string, mixed_offsets_as_utc
446451
)

pyogrio/tests/conftest.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,42 @@ def list_field_values_files(tmp_path, request):
336336
return list_field_values_parquet_file()
337337

338338

339+
@pytest.fixture(scope="function")
340+
def many_data_types_geojson_file(tmp_path):
341+
# create GeoJSON file with properties of many data types
342+
many_types_geojson = """{
343+
"type": "FeatureCollection",
344+
"features": [
345+
{
346+
"type": "Feature",
347+
"geometry": {
348+
"type": "Point",
349+
"coordinates": [0, 0]
350+
},
351+
"properties": {
352+
"int_col": 1,
353+
"float_col": 1.5,
354+
"str_col": "string",
355+
"bool_col": true,
356+
"null_col": null,
357+
"date_col": "2020-01-01",
358+
"time_col": "12:00:00",
359+
"datetime_col": "2020-01-01T12:00:00",
360+
"list_int_col": [1, 2, 3],
361+
"list_str_col": ["a", "b", "c"],
362+
"list_mixed_col": [1, "a", null, true]
363+
}
364+
}
365+
]
366+
}"""
367+
368+
filename = tmp_path / "test_many_data_types.geojson"
369+
with open(filename, "w") as f:
370+
_ = f.write(many_types_geojson)
371+
372+
return filename
373+
374+
339375
@pytest.fixture(scope="function")
340376
def nested_geojson_file(tmp_path):
341377
# create GeoJSON file with nested properties

pyogrio/tests/test_geopandas_io.py

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,14 @@
5151
import geopandas as gp
5252
import pandas as pd
5353
from geopandas.array import from_wkt
54-
from pandas.api.types import is_datetime64_dtype, is_object_dtype, is_string_dtype
54+
from pandas.api.types import (
55+
is_bool_dtype,
56+
is_datetime64_dtype,
57+
is_float_dtype,
58+
is_integer_dtype,
59+
is_object_dtype,
60+
is_string_dtype,
61+
)
5562

5663
import shapely # if geopandas is present, shapely is expected to be present
5764
from shapely.geometry import Point
@@ -515,6 +522,93 @@ def test_read_list_nested_struct_parquet_file(
515522
assert result["col_struct"][2] == {"a": 1, "b": 2}
516523

517524

525+
@pytest.mark.requires_arrow_write_api
526+
def test_roundtrip_many_data_types_geojson_file(
527+
request, tmp_path, many_data_types_geojson_file, use_arrow
528+
):
529+
"""Test roundtripping a GeoJSON file containing many data types."""
530+
531+
def validate_result(df: pd.DataFrame, use_arrow: bool, ignore_mixed_list_col=False):
532+
"""Function to validate the data of many_data_types_geojson_file.
533+
534+
Depending on arrow being used or not there are small differences.
535+
"""
536+
assert "int_col" in df.columns
537+
assert is_integer_dtype(df["int_col"].dtype)
538+
assert df["int_col"].to_list() == [1]
539+
540+
assert "float_col" in df.columns
541+
assert is_float_dtype(df["float_col"].dtype)
542+
assert df["float_col"].to_list() == [1.5]
543+
544+
assert "str_col" in df.columns
545+
assert is_string_dtype(df["str_col"].dtype)
546+
assert df["str_col"].to_list() == ["string"]
547+
548+
assert "bool_col" in df.columns
549+
assert is_bool_dtype(df["bool_col"].dtype)
550+
assert df["bool_col"].to_list() == [True]
551+
552+
assert "date_col" in df.columns
553+
if use_arrow:
554+
# Arrow returns dates as datetime.date objects.
555+
assert is_object_dtype(df["date_col"].dtype)
556+
assert df["date_col"].to_list() == [pd.Timestamp("2020-01-01").date()]
557+
else:
558+
# Without arrow, date columns are returned as datetime64.
559+
assert is_datetime64_dtype(df["date_col"].dtype)
560+
assert df["date_col"].to_list() == [pd.Timestamp("2020-01-01")]
561+
562+
# Ignore time columns till this is solved:
563+
# Reported in https://github.com/geopandas/pyogrio/issues/615
564+
# assert "time_col" in df.columns
565+
# assert is_object_dtype(df["time_col"].dtype)
566+
# assert df["time_col"].to_list() == [time(12, 0, 0)]
567+
568+
assert "datetime_col" in df.columns
569+
assert is_datetime64_dtype(df["datetime_col"].dtype)
570+
assert df["datetime_col"].to_list() == [pd.Timestamp("2020-01-01T12:00:00")]
571+
572+
assert "list_int_col" in df.columns
573+
assert is_object_dtype(df["list_int_col"].dtype)
574+
assert df["list_int_col"][0].tolist() == [1, 2, 3]
575+
576+
assert "list_str_col" in df.columns
577+
assert is_object_dtype(df["list_str_col"].dtype)
578+
assert df["list_str_col"][0].tolist() == ["a", "b", "c"]
579+
580+
if not ignore_mixed_list_col:
581+
assert "list_mixed_col" in df.columns
582+
assert is_object_dtype(df["list_mixed_col"].dtype)
583+
assert df["list_mixed_col"][0] == [1, "a", None, True]
584+
585+
# Read and validate result of reading
586+
read_gdf = read_dataframe(many_data_types_geojson_file, use_arrow=use_arrow)
587+
validate_result(read_gdf, use_arrow)
588+
589+
# Write the data read, read it back, and validate again
590+
if use_arrow:
591+
# Writing a column with mixed types in a list is not supported with Arrow.
592+
ignore_mixed_list_col = True
593+
read_gdf = read_gdf.drop(columns=["list_mixed_col"])
594+
else:
595+
ignore_mixed_list_col = False
596+
request.node.add_marker(
597+
pytest.mark.xfail(
598+
reason="roundtripping list types fails with use_arrow=False"
599+
)
600+
)
601+
602+
tmp_file = tmp_path / "temp.geojson"
603+
write_dataframe(read_gdf, tmp_file, use_arrow=use_arrow)
604+
605+
# Validate data written
606+
read_back_gdf = read_dataframe(tmp_file, use_arrow=use_arrow)
607+
validate_result(
608+
read_back_gdf, use_arrow, ignore_mixed_list_col=ignore_mixed_list_col
609+
)
610+
611+
518612
@pytest.mark.filterwarnings(
519613
"ignore: Non-conformant content for record 1 in column dates"
520614
)

0 commit comments

Comments
 (0)