Skip to content

Commit 066af94

Browse files
fixup pyarrow datetimetz compat and update parquet tests
1 parent 99ba44f commit 066af94

File tree

4 files changed

+34
-8
lines changed

4 files changed

+34
-8
lines changed

pandas/core/internals/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def __getattr__(name: str):
4848
stacklevel=2,
4949
)
5050
if name == "DatetimeTZBlock":
51-
from pandas.core.internals.blocks import DatetimeLikeBlock
51+
from pandas.core.internals.api import _DatetimeTZBlock as DatetimeTZBlock
5252

53-
return DatetimeLikeBlock
53+
return DatetimeTZBlock
5454
if name == "ExtensionBlock":
5555
from pandas.core.internals.blocks import ExtensionBlock
5656

pandas/core/internals/api.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
)
3030
from pandas.core.construction import extract_array
3131
from pandas.core.internals.blocks import (
32+
DatetimeLikeBlock,
3233
check_ndim,
3334
ensure_block_shape,
3435
extract_pandas_array,
@@ -74,6 +75,14 @@ def _make_block(values: ArrayLike, placement: np.ndarray) -> Block:
7475
return klass(values, ndim=2, placement=placement_obj)
7576

7677

78+
class _DatetimeTZBlock(DatetimeLikeBlock):
79+
"""implement a datetime64 block with a tz attribute"""
80+
81+
values: DatetimeArray
82+
83+
__slots__ = ()
84+
85+
7786
def make_block(
7887
values, placement, klass=None, ndim=None, dtype: Dtype | None = None
7988
) -> Block:
@@ -114,6 +123,16 @@ def make_block(
114123
dtype = dtype or values.dtype
115124
klass = get_block_type(dtype)
116125

126+
elif klass is _DatetimeTZBlock and not isinstance(values.dtype, DatetimeTZDtype):
127+
# pyarrow calls get here (pyarrow<15)
128+
values = DatetimeArray._simple_new(
129+
# error: Argument "dtype" to "_simple_new" of "DatetimeArray" has
130+
# incompatible type "Union[ExtensionDtype, dtype[Any], None]";
131+
# expected "Union[dtype[datetime64], DatetimeTZDtype]"
132+
values,
133+
dtype=dtype, # type: ignore[arg-type]
134+
)
135+
117136
if not isinstance(placement, BlockPlacement):
118137
placement = BlockPlacement(placement)
119138

pandas/tests/io/test_parquet.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,14 @@ def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
728728

729729
expected = df_full.copy()
730730
expected.loc[1, "string_with_nan"] = None
731-
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype("M8[ms]")
731+
if pa_version_under13p0:
732+
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype(
733+
"M8[ns]"
734+
)
735+
else:
736+
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype(
737+
"M8[ms]"
738+
)
732739
tm.assert_frame_equal(res, expected)
733740

734741
def test_duplicate_columns(self, pa):
@@ -972,15 +979,12 @@ def test_additional_extension_types(self, pa):
972979

973980
def test_timestamp_nanoseconds(self, pa):
974981
# with version 2.6, pyarrow defaults to writing the nanoseconds, so
975-
# this should work without error
976-
# Note in previous pyarrows(<7.0.0), only the pseudo-version 2.0 was available
982+
# this should work without error, even for pyarrow < 13
977983
ver = "2.6"
978984
df = pd.DataFrame({"a": pd.date_range("2017-01-01", freq="1ns", periods=10)})
979985
check_round_trip(df, pa, write_kwargs={"version": ver})
980986

981987
def test_timezone_aware_index(self, pa, timezone_aware_date_list):
982-
pytest.importorskip("pyarrow", "11.0.0")
983-
984988
idx = 5 * [timezone_aware_date_list]
985989
df = pd.DataFrame(index=idx, data={"index_as_col": idx})
986990

@@ -995,6 +999,8 @@ def test_timezone_aware_index(self, pa, timezone_aware_date_list):
995999
# this use-case sets the resolution to 1 minute
9961000

9971001
expected = df[:]
1002+
if pa_version_under13p0:
1003+
expected.index = expected.index.as_unit("ns")
9981004
if timezone_aware_date_list.tzinfo != datetime.timezone.utc:
9991005
# pyarrow returns pytz.FixedOffset while pandas constructs datetime.timezone
10001006
# https://github.com/pandas-dev/pandas/issues/37286
@@ -1178,7 +1184,7 @@ def test_infer_string_large_string_type(self, tmp_path, pa):
11781184

11791185
def test_non_nanosecond_timestamps(self, temp_file):
11801186
# GH#49236
1181-
pa = pytest.importorskip("pyarrow", "11.0.0")
1187+
pa = pytest.importorskip("pyarrow", "13.0.0")
11821188
pq = pytest.importorskip("pyarrow.parquet")
11831189

11841190
arr = pa.array([datetime.datetime(1600, 1, 1)], type=pa.timestamp("us"))

scripts/validate_unwanted_patterns.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
"_get_option",
5454
"_fill_limit_area_1d",
5555
"_make_block",
56+
"_DatetimeTZBlock",
5657
}
5758

5859

0 commit comments

Comments
 (0)