diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 58529c5597b6e..b4c3e6ea754aa 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -717,6 +717,35 @@ def _union(self, other, sort): assert isinstance(other, type(self)) assert self.dtype == other.dtype + # For tz-aware DatetimeIndex, perform union in UTC to avoid + # local-time irregularities across DST transitions, then convert back. + tz = getattr(self.dtype, "tz", None) + if tz is not None: + other_tz = getattr(other.dtype, "tz", None) + if ( + other_tz == tz + and isinstance(self._data, DatetimeArray) + and isinstance(other._data, DatetimeArray) + ): + left_utc_naive = self._data.tz_convert("UTC").tz_localize(None) + right_utc_naive = other._data.tz_convert("UTC").tz_localize(None) + left_naive = type(self)._simple_new(left_utc_naive, name=self.name) + right_naive = type(other)._simple_new(right_utc_naive, name=other.name) + res_naive = super(type(left_naive), left_naive)._union( + right_naive, sort + ) + + if isinstance(res_naive, DatetimeArray): + base_arr = res_naive + name = self.name + else: + base_arr = cast(DatetimeArray, res_naive._data) + name = res_naive.name + + res_arr = base_arr.tz_localize("UTC").tz_convert(tz) + res = type(self)._simple_new(res_arr, name=name) + return res._with_freq("infer") + if self._can_range_setop(other): return self._range_union(other, sort=sort) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7a68cb867c94e..25e4250866e91 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -60,6 +60,30 @@ def test_union3(self, sort, box): result = first.union(case, sort=sort) tm.assert_index_equal(result, expected) + +def test_union_across_dst_boundary(): + # US/Eastern DST spring-forward on 2021-03-14 at 02:00 + # (02:00-02:59 local time does not exist) + tz = "US/Eastern" + # Left side spans up to the missing hour window + left = date_range("2021-03-14 00:00", periods=3, freq="h", tz=tz) + # right side continues from the first valid post-DST hour + right = date_range("2021-03-14 03:00", periods=3, freq="h", tz=tz) + + # Expect a union that preserves tz and includes valid hours without duplicates + expected = DatetimeIndex( + [ + Timestamp("2021-03-14 00:00", tz=tz), + Timestamp("2021-03-14 01:00", tz=tz), + Timestamp("2021-03-14 03:00", tz=tz), + Timestamp("2021-03-14 04:00", tz=tz), + Timestamp("2021-03-14 05:00", tz=tz), + ] + ).as_unit(left.unit) + + result = left.union(right) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("tz", tz) def test_union(self, tz, sort): rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)