Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,9 +488,13 @@ def _reduce(
arr = pc.or_kleene(nas, pc.not_equal(self._pa_array, ""))
else:
arr = pc.not_equal(self._pa_array, "")
return ArrowExtensionArray(arr)._reduce(
result = ArrowExtensionArray(arr)._reduce(
name, skipna=skipna, keepdims=keepdims, **kwargs
)
if keepdims:
# ArrowExtensionArray will return a length-1 bool[pyarrow] array
return result.astype(np.bool_)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a chance of having NAs here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other than this, LGTM

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think not. So with the default of skipna=True, we skip missing values and AFAIK the result is never a NaN (because for empty the result is True/False for all/any).
And for the case of skipna=False, we are still in the "NaN-semantics" branch, where we do not use Kleene logic but regard missing values as Trues.

return result

result = self._reduce_calc(name, skipna=skipna, keepdims=keepdims, **kwargs)
if name in ("argmin", "argmax") and isinstance(result, pa.Array):
Expand Down
20 changes: 9 additions & 11 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
HAS_PYARROW,
PYPY,
)
from pandas.compat import PYPY

from pandas import (
Categorical,
Expand Down Expand Up @@ -299,18 +294,21 @@ def test_nbytes(self):
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
assert cat.nbytes == exp

@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
)
def test_memory_usage(self):
def test_memory_usage(self, using_infer_string):
cat = Categorical([1, 2, 3])

# .categories is an index, so we include the hashtable
assert 0 < cat.nbytes <= cat.memory_usage()
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)

cat = Categorical(["foo", "foo", "bar"])
assert cat.memory_usage(deep=True) > cat.nbytes
if using_infer_string:
if cat.categories.dtype.storage == "python":
assert cat.memory_usage(deep=True) > cat.nbytes
else:
assert cat.memory_usage(deep=True) >= cat.nbytes
else:
assert cat.memory_usage(deep=True) > cat.nbytes

if not PYPY:
# sys.getsizeof will call the .memory_usage with
Expand Down
7 changes: 1 addition & 6 deletions pandas/tests/arrays/integer/test_reduction.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -104,10 +102,7 @@ def test_groupby_reductions(op, expected):
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
],
)
def test_mixed_reductions(request, op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
# TODO(infer_string) inconsistent result type
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
def test_mixed_reductions(op, expected):
df = DataFrame(
{
"A": ["a", "b", "b"],
Expand Down