@@ -39,7 +39,7 @@ bool CuLDA::Init(std::string opt_path) {
39
39
void CuLDA::LoadModel (float * alpha, float * beta,
40
40
float * grad_alpha, float * new_beta, int num_words) {
41
41
num_words_ = num_words;
42
- DEBUG (" copy model({} x {})" , num_topics_, num_words_ );
42
+ DEBUG (" copy model({} x {})" , num_words_, num_topics_ );
43
43
dev_alpha_.resize (num_topics_);
44
44
dev_beta_.resize (num_topics_ * num_words_);
45
45
thrust::copy (alpha, alpha + num_topics_, dev_alpha_.begin ());
@@ -49,7 +49,7 @@ void CuLDA::LoadModel(float* alpha, float* beta,
49
49
// resize device vector
50
50
grad_alpha_ = grad_alpha;
51
51
new_beta_ = new_beta;
52
- dev_grad_alpha_.resize (block_cnt_ * num_topics_ );
52
+ dev_grad_alpha_.resize (num_topics_ * block_cnt_ );
53
53
dev_new_beta_.resize (num_topics_ * num_words_);
54
54
55
55
// copy to device
@@ -74,15 +74,15 @@ std::pair<float, float> CuLDA::FeedData(
74
74
thrust::copy (cols, cols + num_cols, dev_cols.begin ());
75
75
thrust::copy (indptr, indptr + num_indptr + 1 , dev_indptr.begin ());
76
76
thrust::copy (vali, vali + num_cols, dev_vali.begin ());
77
-
78
77
CHECK_CUDA (cudaDeviceSynchronize ());
78
+ DEBUG0 (" copy feed data to GPU memory" );
79
79
80
80
// run E step in GPU
81
81
EstepKernel<<<block_cnt_, block_dim_>>> (
82
82
thrust::raw_pointer_cast (dev_cols.data ()),
83
83
thrust::raw_pointer_cast (dev_indptr.data ()),
84
84
thrust::raw_pointer_cast (dev_vali.data ()),
85
- num_cols, num_indptr, num_words_, num_topics_, num_iters,
85
+ num_cols, num_indptr, num_topics_, num_iters,
86
86
thrust::raw_pointer_cast (dev_gamma_.data ()),
87
87
thrust::raw_pointer_cast (dev_new_gamma_.data ()),
88
88
thrust::raw_pointer_cast (dev_phi_.data ()),
@@ -92,14 +92,15 @@ std::pair<float, float> CuLDA::FeedData(
92
92
thrust::raw_pointer_cast (dev_new_beta_.data ()),
93
93
thrust::raw_pointer_cast (dev_train_losses.data ()),
94
94
thrust::raw_pointer_cast (dev_vali_losses.data ()));
95
-
96
95
CHECK_CUDA (cudaDeviceSynchronize ());
96
+ DEBUG0 (" run E step in GPU" );
97
97
98
98
// pull loss
99
99
std::vector<float > train_losses (block_cnt_), vali_losses (block_cnt_);
100
100
thrust::copy (dev_train_losses.begin (), dev_train_losses.end (), train_losses.begin ());
101
101
thrust::copy (dev_vali_losses.begin (), dev_vali_losses.end (), vali_losses.begin ());
102
102
CHECK_CUDA (cudaDeviceSynchronize ());
103
+ DEBUG0 (" pull loss values" );
103
104
104
105
// accumulate
105
106
float train_loss = std::accumulate (train_losses.begin (), train_losses.end (), 0 .0f );
0 commit comments