Skip to content

Commit e3ebe18

Browse files
Merge pull request #13 from MatthewSZhang/slice
MNT remove memoryview slicing
2 parents 114b0dc + 8f69afe commit e3ebe18

File tree

4 files changed

+444
-436
lines changed

4 files changed

+444
-436
lines changed

README.rst

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,27 @@ Or via conda-forge:
5757
Getting Started
5858
---------------
5959
>>> from fastcan import FastCan
60-
>>> X = [[1, 0], [0, 1]]
61-
>>> y = [1, 0]
62-
>>> FastCan(verbose=0).fit(X, y).get_support()
63-
array([ True, False])
60+
>>> X = [[ 0.87, -1.34, 0.31 ],
61+
... [-2.79, -0.02, -0.85 ],
62+
... [-1.34, -0.48, -2.55 ],
63+
... [ 1.92, 1.48, 0.65 ]]
64+
>>> y = [0, 1, 0, 1]
65+
>>> selector = FastCan(n_features_to_select=2, verbose=0).fit(X, y)
66+
>>> selector.get_support()
67+
array([ True, True, False])
68+
>>> selector.get_support(indices=True) # Sorted indices
69+
array([0, 1])
70+
>>> selector.indices_ # Indices in selection order
71+
array([1, 0], dtype=int32)
72+
>>> selector.scores_ # Scores for selected features in selection order
73+
array([0.64276838, 0.09498243])
74+
>>> # Here Feature 2 must be included
75+
>>> selector = FastCan(n_features_to_select=2, indices_include=[2], verbose=0).fit(X, y)
76+
>>> # We can find the feature which is useful when working with Feature 2
77+
>>> selector.indices_
78+
array([2, 1], dtype=int32)
79+
>>> selector.scores_
80+
array([0.16632562, 0.50544788])
6481

6582

6683
Citation

fastcan/_cancorr_fast.pyx

Lines changed: 47 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ from sklearn.utils._typedefs cimport int32_t
1212

1313

1414
@final
15-
cdef unsigned int _bsum(
16-
bint* x,
17-
unsigned int n,
15+
cdef int _bsum(
16+
const bint* x,
17+
int n,
1818
) noexcept nogil:
1919
"""Computes the sum of the vector of bool elements.
2020
"""
2121
cdef:
22-
unsigned int total = 0
23-
unsigned int i
22+
int total = 0
23+
int i
2424
for i in range(n):
2525
total += x[i]
2626
return total
@@ -29,7 +29,7 @@ cdef unsigned int _bsum(
2929
@final
3030
cdef int _iamax(
3131
int n,
32-
const floating *x,
32+
const floating* x,
3333
int incx,
3434
) noexcept nogil:
3535
"""
@@ -45,114 +45,90 @@ cdef int _iamax(
4545

4646
@final
4747
cdef bint _normv(
48-
floating[::1] x, # IN/OUT
48+
const floating* x, # IN/OUT
49+
int n_samples, # IN
4950
) noexcept nogil:
5051
"""
5152
Vector normalization by Euclidean norm.
5253
x (IN) : (n_samples,) Vector.
5354
x (OUT) : (n_samples,) Normalized vector.
55+
n_samples (IN) : number of samples.
5456
Return: Mask the constant vector.
5557
"""
5658
cdef:
57-
unsigned int n_samples = x.shape[0]
5859
floating x_norm
5960

60-
x_norm = _nrm2(n_samples, &x[0], 1)
61+
x_norm = _nrm2(n_samples, x, 1)
6162
if x_norm == 0.0:
6263
return True
6364
x_norm = 1.0/x_norm
64-
_scal(n_samples, x_norm, &x[0], 1)
65+
_scal(n_samples, x_norm, x, 1)
6566
return False
6667

6768

68-
@final
69-
cdef void _normm(
70-
floating[::1, :] X, # IN/OUT
71-
bint* m, # IN/OUT
72-
) noexcept nogil:
73-
"""
74-
Matrix column-wise normalization by Euclidean norm.
75-
X (IN) : (n_samples, nx) Matrix.
76-
X (OUT) : (n_samples, nx) Column-wise normalized matrix.
77-
m (IN): (n_features,) Mask contains only false.
78-
m (OUT): (n_features,) Mask the constant vectors.
79-
"""
80-
cdef:
81-
unsigned int n_samples = X.shape[0]
82-
unsigned int nx = X.shape[1]
83-
floating x_norm
84-
unsigned int j
85-
86-
# X = X/norm(X)
87-
for j in range(nx):
88-
x_norm = _nrm2(n_samples, &X[0, j], 1)
89-
if x_norm == 0.0:
90-
m[j] = True
91-
else:
92-
x_norm = 1.0/x_norm
93-
_scal(n_samples, x_norm, &X[0, j], 1)
94-
95-
9669
@final
9770
cdef floating _sscvm(
98-
const floating[::1] w, # IN
99-
const floating[::1, :] V, # IN
71+
const floating* w, # IN
72+
const floating* V, # IN
73+
int n_samples, # IN
74+
int n_targets, # IN
10075
) noexcept nogil:
10176
"""
10277
Sum of squared correlation coefficients.
10378
w : (n_samples,) Centred orthogonalized feature vector.
104-
V : (n_samples, nv) Centred orthogonalized target matrix.
79+
V : (n_samples, n_targets) Centred orthogonalized target matrix.
80+
n_samples (IN) : number of samples.
81+
n_targets (IN) : column number of V
10582
r2 : (nw,) Sum of squared correlation coefficients, where r2i means the
10683
coefficient of determination between wi and V.
10784
"""
10885
cdef:
109-
unsigned int n_samples = V.shape[0]
110-
unsigned int nv = V.shape[1]
11186
# R : (nw * nv) R**2 contains the pairwise h-correlation or eta-cosine, where
11287
# rij means the h-correlation or eta-cosine between wi and vj.
113-
floating* r = <floating*> malloc(sizeof(floating) * nv)
88+
floating* r = <floating*> malloc(sizeof(floating) * n_targets)
11489
floating r2
11590

11691
# r = w*V (w is treated as (1, n_samples))
117-
_gemm(ColMajor, NoTrans, NoTrans, 1, nv, n_samples, 1.0,
118-
&w[0], 1, &V[0, 0], n_samples, 0.0, r, 1)
92+
_gemm(ColMajor, NoTrans, NoTrans, 1, n_targets, n_samples, 1.0,
93+
w, 1, V, n_samples, 0.0, r, 1)
11994
# r2 = r*r.T
12095

121-
r2 = _dot(nv, r, 1, r, 1)
96+
r2 = _dot(n_targets, r, 1, r, 1)
12297

12398
free(r)
12499
return r2
125100

126101

127102
@final
128103
cdef void _mgsvv(
129-
const floating[::1] w, # IN
130-
floating[::1] x, # IN/OUT
104+
const floating* w, # IN
105+
const floating* x, # IN/OUT
106+
int n_samples, # IN
131107
) noexcept nogil:
132108
"""
133109
Modified Gram-Schmidt process. x = x - w*w.T*x
134110
w : (n_samples,) Centred orthonormal selected feature vector.
135111
x (IN) : (n_samples,) Centred remaining feature vector.
136112
x (OUT) : (n_samples,) Centred remaining feature vector, which is orthogonal to w.
113+
n_samples (IN) : number of samples.
137114
"""
138115
cdef:
139-
unsigned int n_samples = x.shape[0]
140116
floating r
141117

142118
# r = w.T*x
143-
r = _dot(n_samples, &w[0], 1, &x[0], 1)
119+
r = _dot(n_samples, w, 1, x, 1)
144120
# x = x - w*r
145-
_axpy(n_samples, -r, &w[0], 1, &x[0], 1)
121+
_axpy(n_samples, -r, w, 1, x, 1)
146122

147123

148124
@final
149125
cpdef int _forward_search(
150126
floating[::1, :] X, # IN/OUT
151127
floating[::1, :] V, # IN
152-
const unsigned int t, # IN
153-
const floating tol, # IN
154-
const unsigned int num_threads, # IN
155-
const unsigned int verbose, # IN
128+
int t, # IN
129+
floating tol, # IN
130+
int num_threads, # IN
131+
int verbose, # IN
156132
int32_t[::1] indices, # OUT
157133
floating[::1] scores, # OUT
158134
) except -1 nogil:
@@ -168,13 +144,14 @@ cpdef int _forward_search(
168144
scores: (t,) The h-correlation/eta-cosine of selected features.
169145
"""
170146
cdef:
171-
unsigned int n_samples = X.shape[0]
147+
int n_samples = X.shape[0]
148+
int n_targets = V.shape[1]
172149
# OpenMP (in Windows) requires signed integral for prange
173-
int j, n_features = X.shape[1]
150+
int n_features = X.shape[1]
174151
floating* r2 = <floating*> malloc(sizeof(floating) * n_features)
175152
bint* mask = <bint*> malloc(sizeof(bint) * n_features)
176153
floating g, ssc = 0.0
177-
unsigned int i
154+
int i, j
178155
int index = -1
179156

180157
memset(&r2[0], 0, n_features * sizeof(floating))
@@ -183,38 +160,39 @@ cpdef int _forward_search(
183160
for i in range(t):
184161
if i == 0:
185162
# Preprocessing
186-
_normm(X, mask)
163+
for j in range(n_features):
164+
mask[j] = _normv(&X[0, j], n_samples)
187165
else:
188166
mask[index] = True
189167
r2[index] = 0
190168
# Make X orthogonal to X[:, indices[i-1]]
191169
for j in prange(n_features, nogil=True, schedule="static",
192170
chunksize=1, num_threads=num_threads):
193171
if not mask[j]:
194-
_mgsvv(X[:, index], X[:, j])
195-
_normv(X[:, j])
172+
_mgsvv(&X[0, index], &X[0, j], n_samples)
173+
_normv(&X[0, j], n_samples)
196174
# Linear dependence check
197175
g = _dot(n_samples, &X[0, index], 1, &X[0, j], 1)
198176
if abs(g) > tol:
199177
mask[j] = True
200178
r2[j] = 0
201179

202-
if _bsum(mask, n_features) == n_features:
180+
if _bsum(&mask[0], n_features) == n_features:
203181
raise RuntimeError(
204182
"No candidate feature can be found to form a non-singular "
205183
f"matrix with the {i} selected features."
206184
)
207185
if indices[i] != -1:
208186
index = indices[i]
209-
scores[i] = _sscvm(X[:, index], V)
187+
scores[i] = _sscvm(&X[0, index], &V[0, 0], n_samples, n_targets)
210188
else:
211189
# Score for X
212190
for j in range(n_features):
213191
if not mask[j]:
214-
r2[j] = _sscvm(X[:, j], V)
192+
r2[j] = _sscvm(&X[0, j], &V[0, 0], n_samples, n_targets)
215193

216194
# Find max scores and update indices, X, mask, and scores
217-
index = _iamax(n_features, r2, 1)
195+
index = _iamax(n_features, &r2[0], 1)
218196
indices[i] = index
219197
scores[i] = r2[index]
220198

@@ -223,6 +201,9 @@ cpdef int _forward_search(
223201
with gil:
224202
print(f"Progress: {i+1}/{t}, SSC: {ssc:.5f}", end="\r")
225203

204+
if verbose == 1:
205+
with gil:
206+
print()
226207
free(r2)
227208
free(mask)
228209
return 0

fastcan/_fastcan.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,14 +77,10 @@ class FastCan(SelectorMixin, BaseEstimator):
7777
Examples
7878
--------
7979
>>> from fastcan import FastCan
80-
>>> X = [[ 0.87, -1.34, 0.31 ],
81-
... [-2.79, -0.02, -0.85 ],
82-
... [-1.34, -0.48, -2.55 ],
83-
... [ 1.92, 1.48, 0.65 ]]
84-
>>> y = [0, 1, 0, 1]
85-
>>> selector = FastCan(n_features_to_select=2, verbose=0).fit(X, y)
86-
>>> selector.get_support()
87-
array([ True, True, False])
80+
>>> X = [[1, 0], [0, 1]]
81+
>>> y = [1, 0]
82+
>>> FastCan(verbose=0).fit(X, y).get_support()
83+
array([ True, False])
8884
"""
8985

9086
_parameter_constraints: dict = {

0 commit comments

Comments
 (0)