Skip to content

Commit 714d8a7

Browse files
committed
BUG: fix DatetimeIndex union across DST without disabling range fast path
1 parent 1b25d69 commit 714d8a7

File tree

1 file changed

+30
-41
lines changed

1 file changed

+30
-41
lines changed

pandas/core/indexes/datetimelike.py

Lines changed: 30 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -541,14 +541,7 @@ def _as_range_index(self) -> RangeIndex:
541541
return RangeIndex(rng)
542542

543543
def _can_range_setop(self, other) -> bool:
544-
# Only allow range-based setops when both objects are tick-based AND
545-
# not timezone-aware. For tz-aware DatetimeIndex, constant i8 stepping
546-
# does not hold across DST transitions in local time, so avoid range path.
547-
if not (isinstance(self.freq, Tick) and isinstance(other.freq, Tick)):
548-
return False
549-
self_tz = getattr(self.dtype, "tz", None)
550-
other_tz = getattr(other.dtype, "tz", None)
551-
return self_tz is None and other_tz is None
544+
return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
552545

553546
def _wrap_range_setop(self, other, res_i8) -> Self:
554547
new_freq = None
@@ -724,6 +717,35 @@ def _union(self, other, sort):
724717
assert isinstance(other, type(self))
725718
assert self.dtype == other.dtype
726719

720+
# For tz-aware DatetimeIndex, perform union in UTC to avoid
721+
# local-time irregularities across DST transitions, then convert back.
722+
tz = getattr(self.dtype, "tz", None)
723+
if tz is not None:
724+
other_tz = getattr(other.dtype, "tz", None)
725+
if (
726+
other_tz == tz
727+
and isinstance(self._data, DatetimeArray)
728+
and isinstance(other._data, DatetimeArray)
729+
):
730+
left_utc_naive = self._data.tz_convert("UTC").tz_localize(None)
731+
right_utc_naive = other._data.tz_convert("UTC").tz_localize(None)
732+
left_naive = type(self)._simple_new(left_utc_naive, name=self.name)
733+
right_naive = type(other)._simple_new(right_utc_naive, name=other.name)
734+
res_naive = super(type(left_naive), left_naive)._union(
735+
right_naive, sort
736+
)
737+
738+
if isinstance(res_naive, DatetimeArray):
739+
base_arr = res_naive
740+
name = self.name
741+
else:
742+
base_arr = cast(DatetimeArray, res_naive._data)
743+
name = res_naive.name
744+
745+
res_arr = base_arr.tz_localize("UTC").tz_convert(tz)
746+
res = type(self)._simple_new(res_arr, name=name)
747+
return res._with_freq("infer")
748+
727749
if self._can_range_setop(other):
728750
return self._range_union(other, sort=sort)
729751

@@ -733,39 +755,6 @@ def _union(self, other, sort):
733755
# that result.freq == self.freq
734756
return result
735757
else:
736-
# For tz-aware DatetimeIndex, perform union in UTC to avoid
737-
# local-time irregularities across DST transitions, then convert back.
738-
tz = getattr(self.dtype, "tz", None)
739-
other_tz = getattr(other.dtype, "tz", None)
740-
if tz is not None and tz == other_tz:
741-
# Narrow to DatetimeArray to access tz_convert without mypy errors
742-
if isinstance(self._data, DatetimeArray) and isinstance(
743-
other._data, DatetimeArray
744-
):
745-
# Convert both to UTC, then drop tz to avoid re-entering
746-
# tz-aware path
747-
left_utc_naive = self._data.tz_convert("UTC").tz_localize(None)
748-
right_utc_naive = other._data.tz_convert("UTC").tz_localize(None)
749-
left_naive = type(self)._simple_new(left_utc_naive, name=self.name)
750-
right_naive = type(other)._simple_new(
751-
right_utc_naive, name=other.name
752-
)
753-
# Perform base union on tz-naive indices to avoid DST complications
754-
res_naive = super(type(left_naive), left_naive)._union(
755-
right_naive, sort
756-
)
757-
# Localize back to UTC and then convert to original tz
758-
if isinstance(res_naive, DatetimeArray):
759-
base_arr = res_naive
760-
name = self.name
761-
else:
762-
base_arr = cast(DatetimeArray, res_naive._data)
763-
name = res_naive.name
764-
res_arr = base_arr.tz_localize("UTC").tz_convert(tz)
765-
res = type(self)._simple_new(res_arr, name=name)
766-
return res._with_freq("infer")
767-
# Defensive fallback if types are unexpected
768-
return super()._union(other, sort)
769758
return super()._union(other, sort)._with_freq("infer")
770759

771760
# --------------------------------------------------------------------

0 commit comments

Comments
 (0)