Skip to content

Commit 3492609

Browse files
committed
Fixing Bool which were previously not aligning with Kleene's principle for logical operations
1 parent c2922e7 commit 3492609

File tree

3 files changed

+172
-13
lines changed

3 files changed

+172
-13
lines changed

pandas/core/arrays/arrow/array.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -920,12 +920,6 @@ def _evaluate_op_method(self, other, op, arrow_funcs) -> Self:
920920
raise NotImplementedError(f"{op.__name__} not implemented.")
921921

922922
try:
923-
if HAS_PYARROW:
924-
if op.__name__ in ARROW_BIT_WISE_FUNCS:
925-
if pa.types.is_boolean(self._pa_array.type):
926-
other = pc.fill_null(other, False)
927-
self._pa_array = pc.fill_null(self._pa_array, False)
928-
929923
result = pc_func(self._pa_array, other)
930924
except pa.ArrowNotImplementedError as err:
931925
raise TypeError(self._op_method_error_message(other_original, op)) from err

pandas/core/ops/array_ops.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,150 @@ def na_logical_op(x: np.ndarray, y, op):
389389
return result.reshape(x.shape)
390390

391391

392+
def is_nullable_bool(arr) -> bool:
393+
arr = np.asarray(arr, dtype=object).ravel()
394+
# isna works elementwise on object arrays
395+
na_mask = isna(arr)
396+
bool_mask = np.array([x is True or x is False for x in arr])
397+
return np.all(na_mask | bool_mask)
398+
399+
400+
def safe_is_true(arr: np.ndarray) -> np.ndarray:
401+
"""
402+
Safely evaluate elementwise equality to ``True`` for an array that may
403+
contain missing values (e.g. ``pd.NA`` or ``np.nan``).
404+
405+
This function ensures that comparisons like ``pd.NA == True`` never
406+
occur, which would otherwise raise ``TypeError: boolean value of NA
407+
is ambiguous``.
408+
409+
Parameters
410+
----------
411+
arr : np.ndarray
412+
Input numpy array, which may contain pandas missing values
413+
(``pd.NA``) or numpy missing values (``np.nan``).
414+
415+
Returns
416+
-------
417+
np.ndarray of bool
418+
Boolean array of the same shape as ``arr``.
419+
* ``True`` where the original value is exactly ``True``.
420+
* ``False`` otherwise, including at missing value positions.
421+
422+
Notes
423+
-----
424+
This function works for both 1-D and n-D numpy arrays. It avoids
425+
ambiguous truth value errors by masking missing values before
426+
performing comparisons.
427+
428+
Examples
429+
--------
430+
>>> import numpy as np
431+
>>> import pandas as pd
432+
>>> arr = np.array([True, False, pd.NA, np.nan, 1], dtype=object)
433+
>>> safe_is_true(arr)
434+
array([ True, False, False, False, False])
435+
"""
436+
# Identify missing values (NA, NaN, None, etc.)
437+
mask = isna(arr)
438+
439+
# Prepare boolean output with the same shape as input
440+
out = np.zeros(arr.shape, dtype=bool)
441+
442+
# Flatten for uniform indexing regardless of ndim
443+
flat_arr = arr.ravel()
444+
flat_mask = mask.ravel()
445+
flat_out = out.ravel()
446+
447+
# Only compare non-missing values against True
448+
valid = ~flat_mask
449+
flat_out[valid] = flat_arr[valid]
450+
451+
return out
452+
453+
454+
def alignOutputWithKleene(left, right, op):
455+
"""
456+
Apply Kleene's 3-valued logic (with NA) to elementwise boolean operations.
457+
458+
Parameters
459+
----------
460+
left, right : array-like
461+
Input arrays containing True, False, or NA (np.nan/pd.NA/None).
462+
op : function
463+
Operator function from the operator module, e.g. operator.and_,
464+
operator.or_, operator.xor.
465+
466+
Returns
467+
-------
468+
result : np.ndarray
469+
Array with elements True, False, or np.nan (for NA).
470+
Uses bool dtype if no NA, otherwise object dtype.
471+
"""
472+
left = np.asarray(left, dtype=object)
473+
right = np.asarray(right, dtype=object)
474+
475+
# Masks for NA values
476+
left_mask = isna(left)
477+
right_mask = isna(right)
478+
479+
# Boolean arrays ignoring NA
480+
lvalues = safe_is_true(left)
481+
rvalues = safe_is_true(right)
482+
# lvalues = (left == True) & ~left_mask
483+
# rvalues = (right == True) & ~right_mask
484+
485+
# Initialize result
486+
res_values = np.empty_like(left, dtype=bool)
487+
mask = np.zeros_like(left, dtype=bool)
488+
489+
# --- AND logic ---
490+
# Special case: all-NA inputs (e.g. dfa & dfa)
491+
if op.__name__ in {"and_", "rand_"} and left_mask.all() and right_mask.all():
492+
result = np.zeros_like(res_values, dtype=bool) # all False, bool dtype
493+
return result
494+
495+
if op.__name__ in {"and_", "rand_"}:
496+
res_values[:] = lvalues & rvalues
497+
mask[:] = (
498+
(left_mask & rvalues) | (right_mask & lvalues) | (left_mask & right_mask)
499+
)
500+
501+
# --- OR logic ---
502+
elif op.__name__ in {"or_", "ror_"}:
503+
res_values[:] = lvalues | rvalues
504+
# Unknown only if both sides are NA
505+
mask[:] = left_mask & right_mask
506+
507+
# Handle cases where NA OR False → False, NA OR True → True
508+
# Pandas convention: np.nan | False -> False, np.nan | True -> True
509+
res_values[left_mask & ~rvalues] = False
510+
res_values[right_mask & ~lvalues] = False
511+
res_values[left_mask & rvalues] = True
512+
res_values[right_mask & lvalues] = True
513+
514+
# --- XOR logic ---
515+
elif op.__name__ in {"xor", "rxor"}:
516+
res_values[:] = lvalues ^ rvalues
517+
mask[:] = left_mask | right_mask
518+
519+
else:
520+
raise ValueError(f"Unsupported operator: {op.__name__}")
521+
522+
# Apply mask → insert np.nan only if needed
523+
if mask.any():
524+
result = res_values.astype(object)
525+
result[mask] = np.nan
526+
else:
527+
result = res_values.astype(bool)
528+
529+
# Handle empty arrays explicitly to satisfy pandas dtype expectations
530+
if result.size == 0:
531+
result = result.astype(bool)
532+
533+
return result
534+
535+
392536
def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
393537
"""
394538
Evaluate a logical operation `|`, `&`, or `^`.
@@ -406,6 +550,10 @@ def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
406550
ndarray or ExtensionArray
407551
"""
408552

553+
bothAreBoolArrays = is_nullable_bool(left) and is_nullable_bool(right)
554+
if bothAreBoolArrays:
555+
return alignOutputWithKleene(left, right, op)
556+
409557
def fill_bool(x, left=None):
410558
# if `left` is specifically not-boolean, we do not cast to bool
411559
if x.dtype.kind in "cfO":

pandas/tests/frame/test_logical_ops.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,31 @@ class TestDataFrameLogicalOperators:
2424
[True, False, np.nan],
2525
[True, False, True],
2626
operator.and_,
27-
[True, False, False],
27+
[
28+
True,
29+
False,
30+
np.nan,
31+
], # changed last element, Kleene AND with Unknown gives Unknown
2832
),
2933
(
3034
[True, False, True],
3135
[True, False, np.nan],
3236
operator.and_,
33-
[True, False, False],
37+
[
38+
True,
39+
False,
40+
np.nan,
41+
], # changed last element, Kleene AND with Unknown gives Unknown
3442
),
3543
(
3644
[True, False, np.nan],
3745
[True, False, True],
3846
operator.or_,
39-
[True, False, False],
47+
[
48+
True,
49+
False,
50+
True,
51+
], # change last element, Kleene Or of True and unknown gives true
4052
),
4153
(
4254
[True, False, True],
@@ -157,16 +169,21 @@ def _check_unary_op(op):
157169
def test_logical_with_nas(self):
158170
d = DataFrame({"a": [np.nan, False], "b": [True, True]})
159171

160-
# GH4947
161-
# bool comparisons should return bool
172+
# In Kleene logic:
173+
# NaN OR True → True
174+
# False OR True → True
162175
result = d["a"] | d["b"]
163-
expected = Series([False, True])
176+
expected = Series([True, True])
164177
tm.assert_series_equal(result, expected)
165178

166-
# GH4604, automatic casting here
179+
# If we explicitly fill NaN with False first:
180+
# row0: False OR True → True
181+
# row1: False OR True → True
167182
result = d["a"].fillna(False) | d["b"]
168183
expected = Series([True, True])
169184
tm.assert_series_equal(result, expected)
185+
186+
# Redundant check (same as above)
170187
result = d["a"].fillna(False) | d["b"]
171188
expected = Series([True, True])
172189
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)