@@ -12,15 +12,15 @@ from sklearn.utils._typedefs cimport int32_t
12
12
13
13
14
14
@final
15
- cdef unsigned int _bsum(
16
- bint* x,
17
- unsigned int n,
15
+ cdef int _bsum(
16
+ const bint* x,
17
+ int n,
18
18
) noexcept nogil:
19
19
""" Computes the sum of the vector of bool elements.
20
20
"""
21
21
cdef:
22
- unsigned int total = 0
23
- unsigned int i
22
+ int total = 0
23
+ int i
24
24
for i in range (n):
25
25
total += x[i]
26
26
return total
@@ -29,7 +29,7 @@ cdef unsigned int _bsum(
29
29
@final
30
30
cdef int _iamax(
31
31
int n,
32
- const floating * x,
32
+ const floating* x,
33
33
int incx,
34
34
) noexcept nogil:
35
35
"""
@@ -45,114 +45,90 @@ cdef int _iamax(
45
45
46
46
@final
47
47
cdef bint _normv(
48
- floating[::1 ] x, # IN/OUT
48
+ const floating* x, # IN/OUT
49
+ int n_samples, # IN
49
50
) noexcept nogil:
50
51
"""
51
52
Vector normalization by Euclidean norm.
52
53
x (IN) : (n_samples,) Vector.
53
54
x (OUT) : (n_samples,) Normalized vector.
55
+ n_samples (IN) : number of samples.
54
56
Return: Mask the constant vector.
55
57
"""
56
58
cdef:
57
- unsigned int n_samples = x.shape[0 ]
58
59
floating x_norm
59
60
60
- x_norm = _nrm2(n_samples, & x[ 0 ] , 1 )
61
+ x_norm = _nrm2(n_samples, x , 1 )
61
62
if x_norm == 0.0 :
62
63
return True
63
64
x_norm = 1.0 / x_norm
64
- _scal(n_samples, x_norm, & x[ 0 ] , 1 )
65
+ _scal(n_samples, x_norm, x , 1 )
65
66
return False
66
67
67
68
68
- @final
69
- cdef void _normm(
70
- floating[::1 , :] X, # IN/OUT
71
- bint* m, # IN/OUT
72
- ) noexcept nogil:
73
- """
74
- Matrix column-wise normalization by Euclidean norm.
75
- X (IN) : (n_samples, nx) Matrix.
76
- X (OUT) : (n_samples, nx) Column-wise normalized matrix.
77
- m (IN): (n_features,) Mask contains only false.
78
- m (OUT): (n_features,) Mask the constant vectors.
79
- """
80
- cdef:
81
- unsigned int n_samples = X.shape[0 ]
82
- unsigned int nx = X.shape[1 ]
83
- floating x_norm
84
- unsigned int j
85
-
86
- # X = X/norm(X)
87
- for j in range (nx):
88
- x_norm = _nrm2(n_samples, & X[0 , j], 1 )
89
- if x_norm == 0.0 :
90
- m[j] = True
91
- else :
92
- x_norm = 1.0 / x_norm
93
- _scal(n_samples, x_norm, & X[0 , j], 1 )
94
-
95
-
96
69
@final
97
70
cdef floating _sscvm(
98
- const floating[::1 ] w, # IN
99
- const floating[::1 , :] V, # IN
71
+ const floating* w, # IN
72
+ const floating* V, # IN
73
+ int n_samples, # IN
74
+ int n_targets, # IN
100
75
) noexcept nogil:
101
76
"""
102
77
Sum of squared correlation coefficients.
103
78
w : (n_samples,) Centred orthogonalized feature vector.
104
- V : (n_samples, nv) Centred orthogonalized target matrix.
79
+ V : (n_samples, n_targets) Centred orthogonalized target matrix.
80
+ n_samples (IN) : number of samples.
81
+ n_targets (IN) : column number of V
105
82
r2 : (nw,) Sum of squared correlation coefficients, where r2i means the
106
83
coefficient of determination between wi and V.
107
84
"""
108
85
cdef:
109
- unsigned int n_samples = V.shape[0 ]
110
- unsigned int nv = V.shape[1 ]
111
86
# R : (nw * nv) R**2 contains the pairwise h-correlation or eta-cosine, where
112
87
# rij means the h-correlation or eta-cosine between wi and vj.
113
- floating* r = < floating* > malloc(sizeof(floating) * nv )
88
+ floating* r = < floating* > malloc(sizeof(floating) * n_targets )
114
89
floating r2
115
90
116
91
# r = w*V (w is treated as (1, n_samples))
117
- _gemm(ColMajor, NoTrans, NoTrans, 1 , nv , n_samples, 1.0 ,
118
- & w[ 0 ] , 1 , & V[ 0 , 0 ] , n_samples, 0.0 , r, 1 )
92
+ _gemm(ColMajor, NoTrans, NoTrans, 1 , n_targets , n_samples, 1.0 ,
93
+ w , 1 , V , n_samples, 0.0 , r, 1 )
119
94
# r2 = r*r.T
120
95
121
- r2 = _dot(nv , r, 1 , r, 1 )
96
+ r2 = _dot(n_targets , r, 1 , r, 1 )
122
97
123
98
free(r)
124
99
return r2
125
100
126
101
127
102
@final
128
103
cdef void _mgsvv(
129
- const floating[::1 ] w, # IN
130
- floating[::1 ] x, # IN/OUT
104
+ const floating* w, # IN
105
+ const floating* x, # IN/OUT
106
+ int n_samples, # IN
131
107
) noexcept nogil:
132
108
"""
133
109
Modified Gram-Schmidt process. x = x - w*w.T*x
134
110
w : (n_samples,) Centred orthonormal selected feature vector.
135
111
x (IN) : (n_samples,) Centred remaining feature vector.
136
112
x (OUT) : (n_samples,) Centred remaining feature vector, which is orthogonal to w.
113
+ n_samples (IN) : number of samples.
137
114
"""
138
115
cdef:
139
- unsigned int n_samples = x.shape[0 ]
140
116
floating r
141
117
142
118
# r = w.T*x
143
- r = _dot(n_samples, & w[ 0 ] , 1 , & x[ 0 ] , 1 )
119
+ r = _dot(n_samples, w , 1 , x , 1 )
144
120
# x = x - w*r
145
- _axpy(n_samples, - r, & w[ 0 ] , 1 , & x[ 0 ] , 1 )
121
+ _axpy(n_samples, - r, w , 1 , x , 1 )
146
122
147
123
148
124
@final
149
125
cpdef int _forward_search(
150
126
floating[::1 , :] X, # IN/OUT
151
127
floating[::1 , :] V, # IN
152
- const unsigned int t, # IN
153
- const floating tol, # IN
154
- const unsigned int num_threads, # IN
155
- const unsigned int verbose, # IN
128
+ int t, # IN
129
+ floating tol, # IN
130
+ int num_threads, # IN
131
+ int verbose, # IN
156
132
int32_t[::1 ] indices, # OUT
157
133
floating[::1 ] scores, # OUT
158
134
) except - 1 nogil:
@@ -168,13 +144,14 @@ cpdef int _forward_search(
168
144
scores: (t,) The h-correlation/eta-cosine of selected features.
169
145
"""
170
146
cdef:
171
- unsigned int n_samples = X.shape[0 ]
147
+ int n_samples = X.shape[0 ]
148
+ int n_targets = V.shape[1 ]
172
149
# OpenMP (in Windows) requires signed integral for prange
173
- int j, n_features = X.shape[1 ]
150
+ int n_features = X.shape[1 ]
174
151
floating* r2 = < floating* > malloc(sizeof(floating) * n_features)
175
152
bint* mask = < bint* > malloc(sizeof(bint) * n_features)
176
153
floating g, ssc = 0.0
177
- unsigned int i
154
+ int i, j
178
155
int index = - 1
179
156
180
157
memset(& r2[0 ], 0 , n_features * sizeof(floating))
@@ -183,38 +160,39 @@ cpdef int _forward_search(
183
160
for i in range (t):
184
161
if i == 0 :
185
162
# Preprocessing
186
- _normm(X, mask)
163
+ for j in range (n_features):
164
+ mask[j] = _normv(& X[0 , j], n_samples)
187
165
else :
188
166
mask[index] = True
189
167
r2[index] = 0
190
168
# Make X orthogonal to X[:, indices[i-1]]
191
169
for j in prange(n_features, nogil = True , schedule = " static" ,
192
170
chunksize = 1 , num_threads = num_threads):
193
171
if not mask[j]:
194
- _mgsvv(X[: , index], X[: , j])
195
- _normv(X[: , j])
172
+ _mgsvv(& X[ 0 , index], & X[ 0 , j], n_samples )
173
+ _normv(& X[ 0 , j], n_samples )
196
174
# Linear dependence check
197
175
g = _dot(n_samples, & X[0 , index], 1 , & X[0 , j], 1 )
198
176
if abs (g) > tol:
199
177
mask[j] = True
200
178
r2[j] = 0
201
179
202
- if _bsum(mask, n_features) == n_features:
180
+ if _bsum(& mask[ 0 ] , n_features) == n_features:
203
181
raise RuntimeError (
204
182
" No candidate feature can be found to form a non-singular "
205
183
f" matrix with the {i} selected features."
206
184
)
207
185
if indices[i] != - 1 :
208
186
index = indices[i]
209
- scores[i] = _sscvm(X[: , index], V )
187
+ scores[i] = _sscvm(& X[ 0 , index], & V[ 0 , 0 ], n_samples, n_targets )
210
188
else :
211
189
# Score for X
212
190
for j in range (n_features):
213
191
if not mask[j]:
214
- r2[j] = _sscvm(X[: , j], V )
192
+ r2[j] = _sscvm(& X[ 0 , j], & V[ 0 , 0 ], n_samples, n_targets )
215
193
216
194
# Find max scores and update indices, X, mask, and scores
217
- index = _iamax(n_features, r2 , 1 )
195
+ index = _iamax(n_features, & r2[ 0 ] , 1 )
218
196
indices[i] = index
219
197
scores[i] = r2[index]
220
198
@@ -223,6 +201,9 @@ cpdef int _forward_search(
223
201
with gil:
224
202
print (f" Progress: {i+1}/{t}, SSC: {ssc:.5f}" , end= " \r " )
225
203
204
+ if verbose == 1 :
205
+ with gil:
206
+ print ()
226
207
free(r2)
227
208
free(mask)
228
209
return 0
0 commit comments