Skip to content

Commit 355b143

Browse files
committed
BUG: Implement elementwise IntervalArray.overlaps (#62004)
1 parent 915b38f commit 355b143

File tree

5 files changed

+44
-54
lines changed

5 files changed

+44
-54
lines changed

doc/source/whatsnew/v2.3.2.rst

Lines changed: 0 additions & 35 deletions
This file was deleted.

pandas/core/arrays/interval.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1423,9 +1423,20 @@ def mid(self) -> Index:
14231423
)
14241424
def overlaps(self, other):
14251425
if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1426-
raise NotImplementedError
1426+
if not isinstance(other, IntervalArray):
1427+
other = IntervalArray(other)
1428+
if len(self) != len(other):
1429+
raise ValueError("Both IntervalArrays must have the same length.")
1430+
if self.closed != other.closed:
1431+
raise ValueError(
1432+
"Both IntervalArrays must have the same 'closed' property."
1433+
)
1434+
1435+
op1 = le if (self.closed_left and other.closed_right) else lt
1436+
op2 = le if (other.closed_left and self.closed_right) else lt
1437+
return op1(self.left, other.right) & op2(other.left, self.right)
14271438
if not isinstance(other, Interval):
1428-
msg = f"`other` must be Interval-like, got {type(other).__name__}"
1439+
msg = f" `other` must be Interval-like, got {type(other).__name__}"
14291440
raise TypeError(msg)
14301441

14311442
# equality is okay if both endpoints are closed (overlap at a point)

pandas/core/arrays/string_arrow.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,6 @@ def _str_contains(
346346
):
347347
if flags:
348348
return super()._str_contains(pat, case, flags, na, regex)
349-
if isinstance(pat, re.Pattern):
350-
pat = pat.pattern
351349

352350
return ArrowStringArrayMixin._str_contains(self, pat, case, flags, na, regex)
353351

pandas/tests/arrays/interval/test_interval.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,34 @@ def test_min_max(self, left_right_dtypes, index_or_series_or_array):
229229
res = arr_na.max(**kws)
230230
assert res == MAX
231231
assert type(res) == type(MAX)
232+
233+
def test_intervalarray_overlaps_all_cases(self):
234+
# Basic self-overlap
235+
data = [(0, 1), (1, 3), (2, 4)]
236+
arr = IntervalArray.from_tuples(data)
237+
result = arr.overlaps(arr)
238+
expected = np.array([True, True, True])
239+
tm.testing.assert_array_equal(result, expected)
240+
241+
# Overlap with different intervals
242+
arr2 = IntervalArray.from_tuples([(0.5, 1.5), (2, 2.5), (3, 5)])
243+
result2 = arr.overlaps(arr2)
244+
expected2 = np.array([True, False, True])
245+
tm.testing.assert_array_equal(result2, expected2)
246+
247+
# Length mismatch
248+
arr_short = IntervalArray.from_tuples([(0, 1)])
249+
with pytest.raises(ValueError, match="same length"):
250+
arr.overlaps(arr_short)
251+
252+
# Closed property mismatch
253+
arr_left = IntervalArray.from_tuples(data, closed="left")
254+
arr_right = IntervalArray.from_tuples(data, closed="right")
255+
with pytest.raises(ValueError, match="same 'closed' property"):
256+
arr_left.overlaps(arr_right)
257+
258+
# Overlap with scalar interval
259+
interval = Interval(1, 2)
260+
result3 = arr.overlaps(interval)
261+
expected3 = np.array([False, True, False])
262+
tm.testing.assert_array_equal(result3, expected3)

pandas/tests/strings/test_find_replace.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -281,21 +281,6 @@ def test_contains_nan(any_string_dtype):
281281
tm.assert_series_equal(result, expected)
282282

283283

284-
def test_str_contains_compiled_regex_arrow_dtype(any_string_dtype):
285-
# GH#61942
286-
ser = Series(["foo", "bar", "baz"], dtype=any_string_dtype)
287-
pat = re.compile("ba.")
288-
result = ser.str.contains(pat)
289-
# Determine expected dtype and values
290-
expected_dtype = {
291-
"string[pyarrow]": "bool[pyarrow]",
292-
"string": "boolean",
293-
"str": bool,
294-
}.get(any_string_dtype, object)
295-
expected = Series([False, True, True], dtype=expected_dtype)
296-
tm.assert_series_equal(result, expected)
297-
298-
299284
# --------------------------------------------------------------------------------------
300285
# str.startswith
301286
# --------------------------------------------------------------------------------------

0 commit comments

Comments
 (0)