Skip to content

Commit 3c717b7

Browse files
authored
Only copy numpy arrays when necessary (#500)
* Don't copy unless necessary but handle non-contiguous arrays * Add warning * changelog * Prepare for release * Don't use retired runner
1 parent 31cd1d8 commit 3c717b7

File tree

7 files changed

+44
-16
lines changed

7 files changed

+44
-16
lines changed

.github/workflows/build-wheels.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ${{ matrix.os }}
1818
strategy:
1919
matrix:
20-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022, macos-13, macos-14]
20+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2022, macos-14, macos-15]
2121

2222
steps:
2323
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0

CHANGELOG.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ Changelog
88
=========
99

1010

11+
4.1.4 - 2025-12-16
12+
------------------
13+
14+
**Other changes:**
15+
16+
- :func:`tabmat.from_df` now avoids unnecessary copies of dense arrays, but still ensures that the results are contiguous (C or F order).
17+
- We now use `narwhals`' v2 API for data frame handling.
18+
19+
1120
4.1.3 - 2025-11-12
1221
------------------
1322

src/tabmat/categorical_matrix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def matvec(
509509
is_int = np.issubdtype(other.dtype, np.signedinteger)
510510

511511
if is_int:
512-
other_m = other.astype(float) # type: ignore
512+
other_m: np.ndarray = other.astype(float)
513513
else:
514514
other_m = other
515515

src/tabmat/constructor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def from_df(
182182
if dense_dfidx:
183183
matrices.append(
184184
DenseMatrix(
185-
df[:, dense_dfidx].to_numpy().astype(dtype),
185+
df[:, dense_dfidx].to_numpy().astype(dtype, copy=False),
186186
column_names=np.asarray(df.columns)[dense_dfidx],
187187
term_names=np.asarray(df.columns)[dense_dfidx],
188188
)

src/tabmat/dense_matrix.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import textwrap
2+
import warnings
23
from typing import Optional, Union
34

45
import numpy as np
@@ -43,6 +44,19 @@ def __init__(self, input_array, column_names=None, term_names=None):
4344
elif input_array.ndim > 2:
4445
raise ValueError("Input array must be 1- or 2-dimensional")
4546

47+
# Ensure array is contiguous (C or F order) for Cython operations
48+
# Only copy if necessary
49+
if (
50+
not input_array.flags["C_CONTIGUOUS"]
51+
and not input_array.flags["F_CONTIGUOUS"]
52+
):
53+
warnings.warn(
54+
"Input array is not contiguous; making a copy.",
55+
UserWarning,
56+
stacklevel=2,
57+
)
58+
input_array = np.asfortranarray(input_array)
59+
4660
self._array = input_array
4761
width = self._array.shape[1]
4862

src/tabmat/formula.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def _encode_numerical(self, values, metadata, encoder_state, spec, drop_rows):
9292
if drop_rows:
9393
values = values.drop(index=values.index[drop_rows])
9494
if isinstance(values, pd.Series):
95-
values = values.to_numpy().astype(self.dtype)
95+
values = values.to_numpy().astype(self.dtype, copy=False)
9696
if (values != 0).mean() <= self.sparse_threshold:
9797
return _InteractableSparseVector(sps.csc_matrix(values[:, np.newaxis]))
9898
else:

tests/test_fast_sandwich.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,25 +64,30 @@ def test_fast_sandwich_dense():
6464

6565

6666
def test_dense_sandwich_on_non_contiguous():
67-
"""Non-regression test for #208"""
67+
"""Non-regression test for #208
68+
69+
DenseMatrix now automatically ensures arrays are contiguous,
70+
so non-contiguous inputs are automatically copied and made contiguous.
71+
"""
6872
rng = np.random.default_rng(seed=123)
6973
X = rng.standard_normal(size=(100, 20))
7074

71-
# Xd wraps a not-contiguous array.
72-
Xd = DenseMatrix(X[:, :10])
73-
Xs = SparseMatrix(csc_matrix(X[:, 10:]))
74-
Xm = SplitMatrix([Xd, Xs])
75+
# Column slicing creates a non-contiguous array, but DenseMatrix
76+
# automatically makes it contiguous (copying only if necessary).
77+
non_contiguous_array = X[:, :10]
78+
assert not non_contiguous_array.flags["C_CONTIGUOUS"]
79+
assert not non_contiguous_array.flags["F_CONTIGUOUS"]
7580

76-
# Making the sandwich product fail.
77-
with pytest.raises(Exception, match="The matrix X is not contiguous"):
78-
Xm.sandwich(np.ones(X.shape[0]))
81+
Xd = DenseMatrix(non_contiguous_array)
82+
# The internal array should now be contiguous
83+
assert Xd.A.flags["C_CONTIGUOUS"] or Xd.A.flags["F_CONTIGUOUS"]
7984

80-
# Xd wraps a copy, which makes the data contiguous.
81-
Xd = DenseMatrix(X[:, :10].copy())
85+
Xs = SparseMatrix(csc_matrix(X[:, 10:]))
8286
Xm = SplitMatrix([Xd, Xs])
8387

84-
# The sandwich product works without problem here.
85-
Xm.sandwich(np.ones(X.shape[0]))
88+
# The sandwich product should work without problem
89+
result = Xm.sandwich(np.ones(X.shape[0]))
90+
assert result is not None
8691

8792

8893
def check(A, d, cols):

0 commit comments

Comments
 (0)