Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,27 @@ Or via conda-forge:
Getting Started
---------------
>>> from fastcan import FastCan
>>> X = [[1, 0], [0, 1]]
>>> y = [1, 0]
>>> FastCan(verbose=0).fit(X, y).get_support()
array([ True, False])
>>> X = [[ 0.87, -1.34, 0.31 ],
... [-2.79, -0.02, -0.85 ],
... [-1.34, -0.48, -2.55 ],
... [ 1.92, 1.48, 0.65 ]]
>>> y = [0, 1, 0, 1]
>>> selector = FastCan(n_features_to_select=2, verbose=0).fit(X, y)
>>> selector.get_support()
array([ True, True, False])
>>> selector.get_support(indices=True) # Sorted indices
array([0, 1])
>>> selector.indices_ # Indices in selection order
array([1, 0], dtype=int32)
>>> selector.scores_ # Scores for selected features in selection order
array([0.64276838, 0.09498243])
>>> # Here Feature 2 must be included
>>> selector = FastCan(n_features_to_select=2, indices_include=[2], verbose=0).fit(X, y)
>>> # We can find the feature which is useful when working with Feature 2
>>> selector.indices_
array([2, 1], dtype=int32)
>>> selector.scores_
array([0.16632562, 0.50544788])


Citation
Expand Down
113 changes: 47 additions & 66 deletions fastcan/_cancorr_fast.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ from sklearn.utils._typedefs cimport int32_t


@final
cdef unsigned int _bsum(
bint* x,
unsigned int n,
cdef int _bsum(
const bint* x,
int n,
) noexcept nogil:
"""Computes the sum of the vector of bool elements.
"""
cdef:
unsigned int total = 0
unsigned int i
int total = 0
int i
for i in range(n):
total += x[i]
return total
Expand All @@ -29,7 +29,7 @@ cdef unsigned int _bsum(
@final
cdef int _iamax(
int n,
const floating *x,
const floating* x,
int incx,
) noexcept nogil:
"""
Expand All @@ -45,114 +45,90 @@ cdef int _iamax(

@final
cdef bint _normv(
floating[::1] x, # IN/OUT
const floating* x, # IN/OUT
int n_samples, # IN
) noexcept nogil:
"""
Vector normalization by Euclidean norm.
x (IN) : (n_samples,) Vector.
x (OUT) : (n_samples,) Normalized vector.
n_samples (IN) : number of samples.
Return: Mask the constant vector.
"""
cdef:
unsigned int n_samples = x.shape[0]
floating x_norm

x_norm = _nrm2(n_samples, &x[0], 1)
x_norm = _nrm2(n_samples, x, 1)
if x_norm == 0.0:
return True
x_norm = 1.0/x_norm
_scal(n_samples, x_norm, &x[0], 1)
_scal(n_samples, x_norm, x, 1)
return False


@final
cdef void _normm(
floating[::1, :] X, # IN/OUT
bint* m, # IN/OUT
) noexcept nogil:
"""
Matrix column-wise normalization by Euclidean norm.
X (IN) : (n_samples, nx) Matrix.
X (OUT) : (n_samples, nx) Column-wise normalized matrix.
m (IN): (n_features,) Mask contains only false.
m (OUT): (n_features,) Mask the constant vectors.
"""
cdef:
unsigned int n_samples = X.shape[0]
unsigned int nx = X.shape[1]
floating x_norm
unsigned int j

# X = X/norm(X)
for j in range(nx):
x_norm = _nrm2(n_samples, &X[0, j], 1)
if x_norm == 0.0:
m[j] = True
else:
x_norm = 1.0/x_norm
_scal(n_samples, x_norm, &X[0, j], 1)


@final
cdef floating _sscvm(
const floating[::1] w, # IN
const floating[::1, :] V, # IN
const floating* w, # IN
const floating* V, # IN
int n_samples, # IN
int n_targets, # IN
) noexcept nogil:
"""
Sum of squared correlation coefficients.
w : (n_samples,) Centred orthogonalized feature vector.
V : (n_samples, nv) Centred orthogonalized target matrix.
V : (n_samples, n_targets) Centred orthogonalized target matrix.
n_samples (IN) : number of samples.
n_targets (IN) : column number of V
r2 : (nw,) Sum of squared correlation coefficients, where r2i means the
coefficient of determination between wi and V.
"""
cdef:
unsigned int n_samples = V.shape[0]
unsigned int nv = V.shape[1]
# R : (nw * nv) R**2 contains the pairwise h-correlation or eta-cosine, where
# rij means the h-correlation or eta-cosine between wi and vj.
floating* r = <floating*> malloc(sizeof(floating) * nv)
floating* r = <floating*> malloc(sizeof(floating) * n_targets)
floating r2

# r = w*V (w is treated as (1, n_samples))
_gemm(ColMajor, NoTrans, NoTrans, 1, nv, n_samples, 1.0,
&w[0], 1, &V[0, 0], n_samples, 0.0, r, 1)
_gemm(ColMajor, NoTrans, NoTrans, 1, n_targets, n_samples, 1.0,
w, 1, V, n_samples, 0.0, r, 1)
# r2 = r*r.T

r2 = _dot(nv, r, 1, r, 1)
r2 = _dot(n_targets, r, 1, r, 1)

free(r)
return r2


@final
cdef void _mgsvv(
const floating[::1] w, # IN
floating[::1] x, # IN/OUT
const floating* w, # IN
const floating* x, # IN/OUT
int n_samples, # IN
) noexcept nogil:
"""
Modified Gram-Schmidt process. x = x - w*w.T*x
w : (n_samples,) Centred orthonormal selected feature vector.
x (IN) : (n_samples,) Centred remaining feature vector.
x (OUT) : (n_samples,) Centred remaining feature vector, which is orthogonal to w.
n_samples (IN) : number of samples.
"""
cdef:
unsigned int n_samples = x.shape[0]
floating r

# r = w.T*x
r = _dot(n_samples, &w[0], 1, &x[0], 1)
r = _dot(n_samples, w, 1, x, 1)
# x = x - w*r
_axpy(n_samples, -r, &w[0], 1, &x[0], 1)
_axpy(n_samples, -r, w, 1, x, 1)


@final
cpdef int _forward_search(
floating[::1, :] X, # IN/OUT
floating[::1, :] V, # IN
const unsigned int t, # IN
const floating tol, # IN
const unsigned int num_threads, # IN
const unsigned int verbose, # IN
int t, # IN
floating tol, # IN
int num_threads, # IN
int verbose, # IN
int32_t[::1] indices, # OUT
floating[::1] scores, # OUT
) except -1 nogil:
Expand All @@ -168,13 +144,14 @@ cpdef int _forward_search(
scores: (t,) The h-correlation/eta-cosine of selected features.
"""
cdef:
unsigned int n_samples = X.shape[0]
int n_samples = X.shape[0]
int n_targets = V.shape[1]
# OpenMP (in Windows) requires signed integral for prange
int j, n_features = X.shape[1]
int n_features = X.shape[1]
floating* r2 = <floating*> malloc(sizeof(floating) * n_features)
bint* mask = <bint*> malloc(sizeof(bint) * n_features)
floating g, ssc = 0.0
unsigned int i
int i, j
int index = -1

memset(&r2[0], 0, n_features * sizeof(floating))
Expand All @@ -183,38 +160,39 @@ cpdef int _forward_search(
for i in range(t):
if i == 0:
# Preprocessing
_normm(X, mask)
for j in range(n_features):
mask[j] = _normv(&X[0, j], n_samples)
else:
mask[index] = True
r2[index] = 0
# Make X orthogonal to X[:, indices[i-1]]
for j in prange(n_features, nogil=True, schedule="static",
chunksize=1, num_threads=num_threads):
if not mask[j]:
_mgsvv(X[:, index], X[:, j])
_normv(X[:, j])
_mgsvv(&X[0, index], &X[0, j], n_samples)
_normv(&X[0, j], n_samples)
# Linear dependence check
g = _dot(n_samples, &X[0, index], 1, &X[0, j], 1)
if abs(g) > tol:
mask[j] = True
r2[j] = 0

if _bsum(mask, n_features) == n_features:
if _bsum(&mask[0], n_features) == n_features:
raise RuntimeError(
"No candidate feature can be found to form a non-singular "
f"matrix with the {i} selected features."
)
if indices[i] != -1:
index = indices[i]
scores[i] = _sscvm(X[:, index], V)
scores[i] = _sscvm(&X[0, index], &V[0, 0], n_samples, n_targets)
else:
# Score for X
for j in range(n_features):
if not mask[j]:
r2[j] = _sscvm(X[:, j], V)
r2[j] = _sscvm(&X[0, j], &V[0, 0], n_samples, n_targets)

# Find max scores and update indices, X, mask, and scores
index = _iamax(n_features, r2, 1)
index = _iamax(n_features, &r2[0], 1)
indices[i] = index
scores[i] = r2[index]

Expand All @@ -223,6 +201,9 @@ cpdef int _forward_search(
with gil:
print(f"Progress: {i+1}/{t}, SSC: {ssc:.5f}", end="\r")

if verbose == 1:
with gil:
print()
free(r2)
free(mask)
return 0
12 changes: 4 additions & 8 deletions fastcan/_fastcan.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,10 @@ class FastCan(SelectorMixin, BaseEstimator):
Examples
--------
>>> from fastcan import FastCan
>>> X = [[ 0.87, -1.34, 0.31 ],
... [-2.79, -0.02, -0.85 ],
... [-1.34, -0.48, -2.55 ],
... [ 1.92, 1.48, 0.65 ]]
>>> y = [0, 1, 0, 1]
>>> selector = FastCan(n_features_to_select=2, verbose=0).fit(X, y)
>>> selector.get_support()
array([ True, True, False])
>>> X = [[1, 0], [0, 1]]
>>> y = [1, 0]
>>> FastCan(verbose=0).fit(X, y).get_support()
array([ True, False])
"""

_parameter_constraints: dict = {
Expand Down
Loading