1
+ # cython: language_level=3
2
+ # cython: boundscheck=False
3
+ # cython: wraparound=False
4
+ # cython: cdivision=True
5
+
6
+
7
+ from libc.math cimport sqrt
8
+ from lightning.impl.dataset_fast cimport RowDataset
9
+
10
+ cimport numpy as np
11
+ import numpy as np
12
+
13
+ from .kernels_fast cimport _fast_anova_kernel_grad
14
+ from .loss_fast cimport LossFunction
15
+
16
+
17
+ np.import_array()
18
+
19
+
20
+ cdef inline void sync(double * param,
21
+ unsigned int * last_seen,
22
+ double grad_norm,
23
+ double learning_rate,
24
+ double beta,
25
+ unsigned int t,
26
+ unsigned int * dt):
27
+
28
+ dt[0 ] = t - last_seen[0 ] # dt could be local. is that efficient?
29
+ if dt[0 ] > 0 :
30
+ sq = sqrt(grad_norm)
31
+ correction = sq / (learning_rate * beta + sq + 1e-6 )
32
+ param[0 ] *= correction ** dt[0 ]
33
+ last_seen[0 ] = t
34
+
35
+
36
+ cdef inline void ada_update(double * param,
37
+ double * grad_norm,
38
+ unsigned int * last_seen,
39
+ double update,
40
+ double lp,
41
+ double learning_rate,
42
+ double beta,
43
+ unsigned int t):
44
+ update *= lp
45
+
46
+ grad_norm[0 ] += update ** 2
47
+ sq = sqrt(grad_norm[0 ])
48
+
49
+ # p <- (p * sq - lr * update) / (lr * beta + sq)
50
+ param[0 ] *= sq
51
+ param[0 ] -= learning_rate * update
52
+ param[0 ] /= 1e-6 + sq + learning_rate * beta
53
+ last_seen[0 ] = t + 1
54
+
55
+
56
+ def _fast_fm_adagrad (self ,
57
+ double[::1] w ,
58
+ double[:, ::1] P not None ,
59
+ RowDataset X ,
60
+ double[::1] y not None ,
61
+ unsigned int degree ,
62
+ double alpha ,
63
+ double beta ,
64
+ bint fit_linear ,
65
+ LossFunction loss ,
66
+ unsigned int max_iter ,
67
+ double learning_rate ,
68
+ callback ,
69
+ int n_calls ):
70
+
71
+ cdef Py_ssize_t n_samples = X.get_n_samples()
72
+ cdef Py_ssize_t n_components = P.shape[0 ]
73
+ cdef Py_ssize_t n_features = P.shape[1 ]
74
+
75
+ cdef bint has_callback = callback is not None
76
+
77
+ cdef unsigned int it, t, dt
78
+ cdef Py_ssize_t i, s, j, jj
79
+
80
+ cdef double y_pred, update
81
+
82
+ # data pointers
83
+ cdef double * data
84
+ cdef int * indices
85
+ cdef int n_nz
86
+
87
+ # working memory and DP tables
88
+ cdef double [:, ::1 ] P_grad_data
89
+ cdef double [::1 , :] A
90
+ cdef double [::1 , :] Ad
91
+
92
+ # to avoid reallocating at every iteration, we allocate more than enough
93
+
94
+ P_grad_data = np.empty_like(P)
95
+ A = np.empty((n_features + 1 , degree + 1 ), order = ' f' )
96
+ Ad = np.empty_like(A, order = ' f' )
97
+
98
+ # adagrad bookkeeping
99
+ cdef double [::1 ] w_grad_norms
100
+ cdef double [:, ::1 ] P_grad_norms
101
+ cdef unsigned int [::1 ] w_last_seen
102
+ cdef unsigned int [:, ::1 ] P_last_seen
103
+ w_grad_norms = np.zeros_like(w)
104
+ P_grad_norms = np.zeros_like(P)
105
+ w_last_seen = np.zeros_like(w, dtype = np.uint32)
106
+ P_last_seen = np.zeros_like(P, dtype = np.uint32)
107
+
108
+ t = 0
109
+ for it in range (max_iter):
110
+
111
+ for i in range (n_samples):
112
+ X.get_row_ptr(i, & indices, & data, & n_nz)
113
+
114
+ y_pred = 0
115
+
116
+ # catch up
117
+ if fit_linear:
118
+ for jj in range (n_nz):
119
+ j = indices[jj]
120
+ sync(& w[j], & w_last_seen[j], w_grad_norms[j],
121
+ learning_rate, alpha, t, & dt)
122
+
123
+ for s in range (n_components):
124
+ for jj in range (n_nz):
125
+ j = indices[jj]
126
+ sync(& P[s, j], & P_last_seen[s, j], P_grad_norms[s, j],
127
+ learning_rate, beta, t, & dt)
128
+
129
+ # compute predictions
130
+ if fit_linear:
131
+ for jj in range (n_nz):
132
+ j = indices[jj]
133
+ y_pred += w[j] * data[jj]
134
+
135
+ for s in range (n_components):
136
+ y_pred += _fast_anova_kernel_grad(A,
137
+ Ad,
138
+ P,
139
+ s,
140
+ indices,
141
+ data,
142
+ n_nz,
143
+ degree,
144
+ P_grad_data)
145
+
146
+ # update
147
+ lp = - loss.dloss(y[i], y_pred)
148
+
149
+ if fit_linear:
150
+ for jj in range (n_nz):
151
+ j = indices[jj]
152
+ ada_update(& w[j],
153
+ & w_grad_norms[j],
154
+ & w_last_seen[j],
155
+ data[jj], # derivative wrt w[j] is x[j]
156
+ lp,
157
+ learning_rate,
158
+ alpha,
159
+ t)
160
+
161
+ for s in range (n_components):
162
+ for jj in range (n_nz):
163
+ j = indices[jj]
164
+ ada_update(& P[s, j],
165
+ & P_grad_norms[s, j],
166
+ & P_last_seen[s, j],
167
+ P_grad_data[s, jj],
168
+ lp,
169
+ learning_rate,
170
+ beta,
171
+ t)
172
+ t += 1
173
+ # end for n_samples
174
+
175
+ if has_callback and it % n_calls == 0 :
176
+ ret = callback(self , it)
177
+ if ret is not None :
178
+ break
179
+ # end for max_iter
180
+
181
+ # finalize
182
+ for j in range (n_features):
183
+ sync(& w[j], & w_last_seen[j], w_grad_norms[j], learning_rate, alpha, t,
184
+ & dt)
185
+ for s in range (n_components):
186
+ for j in range (n_features):
187
+ sync(& P[s, j], & P_last_seen[s, j], P_grad_norms[s, j],
188
+ learning_rate, beta, t, & dt)
0 commit comments