1+ # cython: language_level=3
2+ # cython: boundscheck=False
3+ # cython: wraparound=False
4+ # cython: cdivision=True
5+
6+
7+ from libc.math cimport sqrt
8+ from lightning.impl.dataset_fast cimport RowDataset
9+
10+ cimport numpy as np
11+ import numpy as np
12+
13+ from .kernels_fast cimport _fast_anova_kernel_grad
14+ from .loss_fast cimport LossFunction
15+
16+
17+ np.import_array()
18+
19+
20+ cdef inline void sync(double * param,
21+ unsigned int * last_seen,
22+ double grad_norm,
23+ double learning_rate,
24+ double beta,
25+ unsigned int t,
26+ unsigned int * dt):
27+
28+ dt[0 ] = t - last_seen[0 ] # dt could be local. is that efficient?
29+ if dt[0 ] > 0 :
30+ sq = sqrt(grad_norm)
31+ correction = sq / (learning_rate * beta + sq + 1e-6 )
32+ param[0 ] *= correction ** dt[0 ]
33+ last_seen[0 ] = t
34+
35+
36+ cdef inline void ada_update(double * param,
37+ double * grad_norm,
38+ unsigned int * last_seen,
39+ double update,
40+ double lp,
41+ double learning_rate,
42+ double beta,
43+ unsigned int t):
44+ update *= lp
45+
46+ grad_norm[0 ] += update ** 2
47+ sq = sqrt(grad_norm[0 ])
48+
49+ # p <- (p * sq - lr * update) / (lr * beta + sq)
50+ param[0 ] *= sq
51+ param[0 ] -= learning_rate * update
52+ param[0 ] /= 1e-6 + sq + learning_rate * beta
53+ last_seen[0 ] = t + 1
54+
55+
56+ def _fast_fm_adagrad (self ,
57+ double[::1] w ,
58+ double[:, ::1] P not None ,
59+ RowDataset X ,
60+ double[::1] y not None ,
61+ unsigned int degree ,
62+ double alpha ,
63+ double beta ,
64+ bint fit_linear ,
65+ LossFunction loss ,
66+ unsigned int max_iter ,
67+ double learning_rate ,
68+ callback ,
69+ int n_calls ):
70+
71+ cdef Py_ssize_t n_samples = X.get_n_samples()
72+ cdef Py_ssize_t n_components = P.shape[0 ]
73+ cdef Py_ssize_t n_features = P.shape[1 ]
74+
75+ cdef bint has_callback = callback is not None
76+
77+ cdef unsigned int it, t, dt
78+ cdef Py_ssize_t i, s, j, jj
79+
80+ cdef double y_pred, update
81+
82+ # data pointers
83+ cdef double * data
84+ cdef int * indices
85+ cdef int n_nz
86+
87+ # working memory and DP tables
88+ cdef double [:, ::1 ] P_grad_data
89+ cdef double [::1 , :] A
90+ cdef double [::1 , :] Ad
91+
92+ # to avoid reallocating at every iteration, we allocate more than enough
93+
94+ P_grad_data = np.empty_like(P)
95+ A = np.empty((n_features + 1 , degree + 1 ), order = ' f' )
96+ Ad = np.empty_like(A, order = ' f' )
97+
98+ # adagrad bookkeeping
99+ cdef double [::1 ] w_grad_norms
100+ cdef double [:, ::1 ] P_grad_norms
101+ cdef unsigned int [::1 ] w_last_seen
102+ cdef unsigned int [:, ::1 ] P_last_seen
103+ w_grad_norms = np.zeros_like(w)
104+ P_grad_norms = np.zeros_like(P)
105+ w_last_seen = np.zeros_like(w, dtype = np.uint32)
106+ P_last_seen = np.zeros_like(P, dtype = np.uint32)
107+
108+ t = 0
109+ for it in range (max_iter):
110+
111+ for i in range (n_samples):
112+ X.get_row_ptr(i, & indices, & data, & n_nz)
113+
114+ y_pred = 0
115+
116+ # catch up
117+ if fit_linear:
118+ for jj in range (n_nz):
119+ j = indices[jj]
120+ sync(& w[j], & w_last_seen[j], w_grad_norms[j],
121+ learning_rate, alpha, t, & dt)
122+
123+ for s in range (n_components):
124+ for jj in range (n_nz):
125+ j = indices[jj]
126+ sync(& P[s, j], & P_last_seen[s, j], P_grad_norms[s, j],
127+ learning_rate, beta, t, & dt)
128+
129+ # compute predictions
130+ if fit_linear:
131+ for jj in range (n_nz):
132+ j = indices[jj]
133+ y_pred += w[j] * data[jj]
134+
135+ for s in range (n_components):
136+ y_pred += _fast_anova_kernel_grad(A,
137+ Ad,
138+ P,
139+ s,
140+ indices,
141+ data,
142+ n_nz,
143+ degree,
144+ P_grad_data)
145+
146+ # update
147+ lp = - loss.dloss(y[i], y_pred)
148+
149+ if fit_linear:
150+ for jj in range (n_nz):
151+ j = indices[jj]
152+ ada_update(& w[j],
153+ & w_grad_norms[j],
154+ & w_last_seen[j],
155+ data[jj], # derivative wrt w[j] is x[j]
156+ lp,
157+ learning_rate,
158+ alpha,
159+ t)
160+
161+ for s in range (n_components):
162+ for jj in range (n_nz):
163+ j = indices[jj]
164+ ada_update(& P[s, j],
165+ & P_grad_norms[s, j],
166+ & P_last_seen[s, j],
167+ P_grad_data[s, jj],
168+ lp,
169+ learning_rate,
170+ beta,
171+ t)
172+ t += 1
173+ # end for n_samples
174+
175+ if has_callback and it % n_calls == 0 :
176+ ret = callback(self , it)
177+ if ret is not None :
178+ break
179+ # end for max_iter
180+
181+ # finalize
182+ for j in range (n_features):
183+ sync(& w[j], & w_last_seen[j], w_grad_norms[j], learning_rate, alpha, t,
184+ & dt)
185+ for s in range (n_components):
186+ for j in range (n_features):
187+ sync(& P[s, j], & P_last_seen[s, j], P_grad_norms[s, j],
188+ learning_rate, beta, t, & dt)
0 commit comments