Skip to content
This repository was archived by the owner on Dec 6, 2023. It is now read-only.

Commit 8a4b483

Browse files
committed
ENH: refactor: replace naked mallocs with cython arrays
While slightly slower, allocation only takes place once, outside of critical loops. The advantage of not having to worry about leaks (since cython arrays are refcounted) is worth it. Use cvarray instead of np.empty
1 parent e8cfb65 commit 8a4b483

File tree

7 files changed

+3866
-3526
lines changed

7 files changed

+3866
-3526
lines changed

polylearn/cd_direct_fast.cpp

Lines changed: 470 additions & 430 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

polylearn/cd_direct_fast.pyx

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
# Author: Vlad Niculae
88
# License: BSD
99

10-
from libc.stdlib cimport malloc, free
1110
from libc.math cimport fabs
11+
from cython.view cimport array
1212

1313
from lightning.impl.dataset_fast cimport ColumnDataset
1414

@@ -19,7 +19,7 @@ from .cd_linear_fast cimport _cd_linear_epoch
1919
cdef void _precompute(ColumnDataset X,
2020
double[:, :, ::1] P,
2121
Py_ssize_t order,
22-
double* out,
22+
double[:, ::1] out,
2323
Py_ssize_t s,
2424
unsigned int degree):
2525

@@ -32,15 +32,17 @@ cdef void _precompute(ColumnDataset X,
3232
cdef int n_nz
3333

3434
cdef Py_ssize_t i, j, ii
35+
cdef unsigned int d
36+
cdef double tmp
3537

3638
for i in range(n_samples):
37-
out[i] = 0
38-
39+
out[degree - 1, i] = 0
40+
3941
for j in range(n_features):
4042
X.get_column_ptr(j, &indices, &data, &n_nz)
4143
for ii in range(n_nz):
4244
i = indices[ii]
43-
out[i] += (data[ii] * P[order, s, j]) ** degree
45+
out[degree - 1, i] += (data[ii] * P[order, s, j]) ** degree
4446

4547

4648
cdef inline double _update(int* indices,
@@ -50,12 +52,11 @@ cdef inline double _update(int* indices,
5052
double[:] y,
5153
double[:] y_pred,
5254
LossFunction loss,
53-
double* d1,
54-
double* d2,
5555
unsigned int degree,
5656
double lam,
5757
double beta,
58-
double* cache_kp):
58+
double[:, ::1] D,
59+
double[:] cache_kp):
5960

6061
cdef double l1_reg = 2 * beta * fabs(lam)
6162

@@ -70,10 +71,10 @@ cdef inline double _update(int* indices,
7071
i = indices[ii]
7172

7273
if degree == 2:
73-
kp = d1[i] - p_js * data[ii]
74-
elif degree == 3:
75-
kp = 0.5 * (d1[i] ** 2 - d2[i])
76-
kp -= p_js * data[ii] * d1[i]
74+
kp = D[0, i] - p_js * data[ii]
75+
else: # degree == 3:
76+
kp = 0.5 * (D[0, i] ** 2 - D[1, i])
77+
kp -= p_js * data[ii] * D[0, i]
7778
kp += p_js ** 2 * data[ii] ** 2
7879

7980
kp *= lam * data[ii]
@@ -97,12 +98,11 @@ cdef inline double _cd_direct_epoch(double[:, :, ::1] P,
9798
double[:] y,
9899
double[:] y_pred,
99100
double[:] lams,
100-
double* d1,
101-
double* d2,
102101
unsigned int degree,
103102
double beta,
104103
LossFunction loss,
105-
double* cache_kp):
104+
double[:, ::1] D,
105+
double[:] cache_kp):
106106

107107
cdef Py_ssize_t s, j
108108
cdef double p_old, update, offset
@@ -118,9 +118,9 @@ cdef inline double _cd_direct_epoch(double[:, :, ::1] P,
118118
for s in range(n_components):
119119

120120
# initialize the cached ds for this s
121-
_precompute(X, P, order, d1, s, 1)
121+
_precompute(X, P, order, D, s, 1)
122122
if degree == 3:
123-
_precompute(X, P, order, d2, s, 2)
123+
_precompute(X, P, order, D, s, 2)
124124

125125
for j in range(n_features):
126126

@@ -129,7 +129,7 @@ cdef inline double _cd_direct_epoch(double[:, :, ::1] P,
129129
# compute coordinate update
130130
p_old = P[order, s, j]
131131
update = _update(indices, data, n_nz, p_old, y, y_pred,
132-
loss, d1, d2, degree, lams[s], beta, cache_kp)
132+
loss, degree, lams[s], beta, D, cache_kp)
133133
P[order, s, j] -= update
134134
sum_viol += fabs(update)
135135

@@ -138,9 +138,10 @@ cdef inline double _cd_direct_epoch(double[:, :, ::1] P,
138138
i = indices[ii]
139139

140140
if degree == 3:
141-
d2[i] -= (p_old ** 2 - P[order, s, j] ** 2) * data[ii] ** 2
141+
D[1, i] -= ((p_old ** 2 - P[order, s, j] ** 2) *
142+
data[ii] ** 2)
142143

143-
d1[i] -= update * data[ii]
144+
D[0, i] -= update * data[ii]
144145
y_pred[i] -= update * cache_kp[ii]
145146
return sum_viol
146147

@@ -169,11 +170,8 @@ def _cd_direct_ho(double[:, :, ::1] P not None,
169170
cdef bint converged = False
170171

171172
# precomputed values
172-
cdef double *d1 = <double *> malloc(n_samples * sizeof(double))
173-
cdef double *d2
174-
if degree == 3:
175-
d2 = <double *> malloc(n_samples * sizeof(double))
176-
cdef double *cache_kp = <double *> malloc(n_samples * sizeof(double))
173+
cdef double[:, ::1] D = array((degree - 1, n_samples), sizeof(double), 'd')
174+
cdef double[:] cache_kp = array((n_samples,), sizeof(double), 'd')
177175

178176
for it in range(max_iter):
179177
viol = 0
@@ -182,11 +180,11 @@ def _cd_direct_ho(double[:, :, ::1] P not None,
182180
viol += _cd_linear_epoch(w, X, y, y_pred, col_norm_sq, alpha, loss)
183181

184182
if fit_lower and degree == 3: # fit degree 2. Will be looped later.
185-
viol += _cd_direct_epoch(P, 1, X, y, y_pred, lams, d1, d2,
186-
2, beta, loss, cache_kp)
183+
viol += _cd_direct_epoch(P, 1, X, y, y_pred, lams, 2, beta, loss,
184+
D, cache_kp)
187185

188-
viol += _cd_direct_epoch(P, 0, X, y, y_pred, lams, d1, d2,
189-
degree, beta, loss, cache_kp)
186+
viol += _cd_direct_epoch(P, 0, X, y, y_pred, lams, degree, beta, loss,
187+
D, cache_kp)
190188

191189
if verbose:
192190
print("Iteration", it + 1, "violation sum", viol)
@@ -197,10 +195,4 @@ def _cd_direct_ho(double[:, :, ::1] P not None,
197195
converged = True
198196
break
199197

200-
# Free up cache
201-
free(d1)
202-
free(cache_kp)
203-
if degree == 3:
204-
free(d2)
205-
206198
return converged

0 commit comments

Comments
 (0)