Skip to content

REF: Move methods in core/reshape/util.py to where they are used #59172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 57 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,6 @@ def from_product(
(2, 'purple')],
names=['number', 'color'])
"""
from pandas.core.reshape.util import cartesian_product

if not is_list_like(iterables):
raise TypeError("Input must be a list / sequence of iterables.")
Expand Down Expand Up @@ -4105,3 +4104,60 @@ def _require_listlike(level, arr, arrname: str):
if not is_list_like(arr) or not is_list_like(arr[0]):
raise TypeError(f"{arrname} must be list of lists-like")
return level, arr


def cartesian_product(X: list[np.ndarray]) -> list[np.ndarray]:
"""
Numpy version of itertools.product.
Sometimes faster (for large inputs)...

Parameters
----------
X : list-like of list-likes

Returns
-------
product : list of ndarrays

Examples
--------
>>> cartesian_product([list("ABC"), [1, 2]])
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='<U1'), array([1, 2, 1, 2, 1, 2])]

See Also
--------
itertools.product : Cartesian product of input iterables. Equivalent to
nested for-loops.
"""
msg = "Input must be a list-like of list-likes"
if not is_list_like(X):
raise TypeError(msg)
for x in X:
if not is_list_like(x):
raise TypeError(msg)

if len(X) == 0:
return []

lenX = np.fromiter((len(x) for x in X), dtype=np.intp)
cumprodX = np.cumprod(lenX)

if np.any(cumprodX < 0):
raise ValueError("Product space too large to allocate arrays!")

a = np.roll(cumprodX, 1)
a[0] = 1

if cumprodX[-1] != 0:
b = cumprodX[-1] / cumprodX
else:
# if any factor is empty, the cartesian product is empty
b = np.zeros_like(cumprodX)

return [
np.tile(
np.repeat(x, b[i]),
np.prod(a[i]),
)
for i, x in enumerate(X)
]
4 changes: 2 additions & 2 deletions pandas/core/reshape/melt.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import pandas.core.algorithms as algos
from pandas.core.indexes.api import MultiIndex
from pandas.core.reshape.concat import concat
from pandas.core.reshape.util import tile_compat
from pandas.core.tools.numeric import to_numeric

if TYPE_CHECKING:
Expand Down Expand Up @@ -266,7 +265,8 @@ def melt(
result = frame._constructor(mdata, columns=mcolumns)

if not ignore_index:
result.index = tile_compat(frame.index, num_cols_adjusted)
taker = np.tile(np.arange(len(frame)), num_cols_adjusted)
result.index = frame.index.take(taker)

return result

Expand Down
9 changes: 2 additions & 7 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
get_objs_combined_axis,
)
from pandas.core.reshape.concat import concat
from pandas.core.reshape.util import cartesian_product
from pandas.core.series import Series

if TYPE_CHECKING:
Expand Down Expand Up @@ -358,15 +357,11 @@ def __internal_pivot_table(

if not dropna:
if isinstance(table.index, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.index.levels), names=table.index.names
)
m = MultiIndex.from_product(table.index.levels, names=table.index.names)
table = table.reindex(m, axis=0, fill_value=fill_value)

if isinstance(table.columns, MultiIndex):
m = MultiIndex.from_arrays(
cartesian_product(table.columns.levels), names=table.columns.names
)
m = MultiIndex.from_product(table.columns.levels, names=table.columns.names)
table = table.reindex(m, axis=1, fill_value=fill_value)

if sort is True and isinstance(table, ABCDataFrame):
Expand Down
85 changes: 0 additions & 85 deletions pandas/core/reshape/util.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
date_range,
)
import pandas._testing as tm
from pandas.core.reshape.util import cartesian_product
from pandas.core.indexes.multi import cartesian_product


class TestCartesianProduct:
Expand All @@ -28,22 +28,6 @@ def test_datetimeindex(self):
tm.assert_index_equal(result1, expected1)
tm.assert_index_equal(result2, expected2)

def test_tzaware_retained(self):
x = date_range("2000-01-01", periods=2, tz="US/Pacific")
y = np.array([3, 4])
result1, result2 = cartesian_product([x, y])

expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

def test_tzaware_retained_categorical(self):
x = date_range("2000-01-01", periods=2, tz="US/Pacific").astype("category")
y = np.array([3, 4])
result1, result2 = cartesian_product([x, y])

expected = x.repeat(2)
tm.assert_index_equal(result1, expected)

@pytest.mark.parametrize("x, y", [[[], []], [[0, 1], []], [[], ["a", "b", "c"]]])
def test_empty(self, x, y):
# product of empty factors
Expand Down