@@ -26,17 +26,19 @@ float Digamma(float x) {
26
26
}
27
27
28
28
__global__ void EstepKernel (
29
- const int * cols, const int * indptr,
29
+ const int * cols, const int * indptr, const bool * vali,
30
30
const int num_cols, const int num_indptr,
31
31
const int num_words, const int num_topics, const int num_iters,
32
32
float * gamma, float * new_gamma, float * phi,
33
33
float * alpha, float * beta,
34
- float * grad_alpha, float * new_beta) {
34
+ float * grad_alpha, float * new_beta, float * train_losses, float * vali_losses ) {
35
35
36
36
// storage for block
37
37
float * _gamma = gamma + num_topics * blockIdx .x ;
38
38
float * _new_gamma = new_gamma + num_topics * blockIdx .x ;
39
39
float * _phi = phi + num_topics * blockIdx .x ;
40
+ float * _grad_alpha = grad_alpha + num_topics * blockIdx .x ;
41
+
40
42
41
43
for (int i = blockIdx .x ; i < num_indptr; i += gridDim .x ) {
42
44
int beg = indptr[i], end = indptr[i + 1 ];
@@ -56,18 +58,34 @@ __global__ void EstepKernel(
56
58
// compute phi from gamma
57
59
for (int k = beg; k < end; ++k) {
58
60
const int w = cols[k];
61
+ const bool _vali = vali[k];
62
+
59
63
// compute phi
60
- for (int l = threadIdx .x ; l < num_topics; l += blockDim .x )
61
- _phi[l] = beta[w * num_topics + l] * expf (Digamma (_gamma[l]));
62
- __syncthreads ();
63
-
64
- // normalize phi and add it to new gamma and new beta
65
- float phi_sum = ReduceSum (_phi, num_topics);
66
- for (int l = threadIdx .x ; l < num_topics; l += blockDim .x ) {
67
- _phi[l] /= phi_sum;
68
- _new_gamma[l] += _phi[l];
69
- if (j + 1 == num_iters)
70
- new_beta[w * num_topics + l] += phi[l];
64
+ if (not _vali or j + 1 == num_iters) {
65
+ for (int l = threadIdx .x ; l < num_topics; l += blockDim .x )
66
+ _phi[l] = beta[w * num_topics + l] * expf (Digamma (_gamma[l]));
67
+ __syncthreads ();
68
+
69
+ // normalize phi and add it to new gamma and new beta
70
+ float phi_sum = ReduceSum (_phi, num_topics);
71
+ for (int l = threadIdx .x ; l < num_topics; l += blockDim .x ) {
72
+ _phi[l] /= phi_sum;
73
+ if (not _vali) _new_gamma[l] += _phi[l];
74
+ if (j + 1 == num_iters) {
75
+ if (not _vali) new_beta[w * num_topics + l] += _phi[l];
76
+ _phi[l] *= beta[w * num_topics + l];
77
+ }
78
+ }
79
+ __syncthreads ();
80
+ }
81
+ if (j + 1 == num_iters) {
82
+ float p = ReduceSum (_phi, num_topics);
83
+ if (threadIdx .x == 0 ) {
84
+ if (_vali)
85
+ vali_losses[blockIdx .x ] += logf (p + EPS);
86
+ else
87
+ train_losses[blockIdx .x ] += logf (p + EPS);
88
+ }
71
89
}
72
90
__syncthreads ();
73
91
}
@@ -79,7 +97,8 @@ __global__ void EstepKernel(
79
97
}
80
98
float gamma_sum = ReduceSum (_gamma, num_topics);
81
99
for (int j = threadIdx .x ; j < num_topics; j += blockDim .x )
82
- grad_alpha[j] += (Digamma (_gamma[j]) - Digamma (gamma_sum));
100
+ _grad_alpha[j] += (Digamma (_gamma[j]) - Digamma (gamma_sum));
101
+
83
102
__syncthreads ();
84
103
}
85
104
}
0 commit comments