Skip to content

Commit 513e774

Browse files
authored
Merge pull request #2 from js1010/task/implement-lda
Task/implement lda
2 parents 00acb20 + c064f69 commit 513e774

File tree

21 files changed

+938
-147
lines changed

21 files changed

+938
-147
lines changed

cpp/include/culda.hpp

Lines changed: 0 additions & 44 deletions
This file was deleted.
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Copyright (c) 2021 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
#pragma once
7+
#include "utils/cuda_utils_kernels.cuh"
8+
9+
10+
namespace cusim {
11+
12+
// reference: http://web.science.mq.edu.au/~mjohnson/code/digamma.c
13+
__inline__ __device__
14+
float Digamma(float x) {
15+
float result = 0.0f, xx, xx2, xx4;
16+
for ( ; x < 7.0f; ++x)
17+
result -= 1.0f / x;
18+
x -= 0.5f;
19+
xx = 1.0f / x;
20+
xx2 = xx * xx;
21+
xx4 = xx2 * xx2;
22+
result += logf(x) + 1.0f / 24.0f * xx2
23+
- 7.0f / 960.0f * xx4 + 31.0f / 8064.0f * xx4 * xx2
24+
- 127.0f / 30720.0f * xx4 * xx4;
25+
return result;
26+
}
27+
28+
__global__ void EstepKernel(
29+
const int* cols, const int* indptr, const bool* vali,
30+
const int num_cols, const int num_indptr,
31+
const int num_topics, const int num_iters,
32+
float* gamma, float* new_gamma, float* phi,
33+
const float* alpha, const float* beta,
34+
float* grad_alpha, float* new_beta,
35+
float* train_losses, float* vali_losses, int* mutex) {
36+
37+
// storage for block
38+
float* _gamma = gamma + num_topics * blockIdx.x;
39+
float* _new_gamma = new_gamma + num_topics * blockIdx.x;
40+
float* _phi = phi + num_topics * blockIdx.x;
41+
float* _grad_alpha = grad_alpha + num_topics * blockIdx.x;
42+
43+
for (int i = blockIdx.x; i < num_indptr; i += gridDim.x) {
44+
int beg = indptr[i], end = indptr[i + 1];
45+
// initialize gamma
46+
for (int j = threadIdx.x; j < num_topics; j += blockDim.x)
47+
_gamma[j] = alpha[j] + (end - beg) / num_topics;
48+
__syncthreads();
49+
50+
// iterate E step
51+
for (int j = 0; j < num_iters; ++j) {
52+
// initialize new gamma
53+
for (int k = threadIdx.x; k < num_topics; k += blockDim.x)
54+
_new_gamma[k] = 0.0f;
55+
__syncthreads();
56+
57+
// compute phi from gamma
58+
for (int k = beg; k < end; ++k) {
59+
const int w = cols[k];
60+
const bool _vali = vali[k];
61+
62+
// compute phi
63+
if (not _vali or j + 1 == num_iters) {
64+
for (int l = threadIdx.x; l < num_topics; l += blockDim.x)
65+
_phi[l] = beta[w * num_topics + l] * expf(Digamma(_gamma[l]));
66+
__syncthreads();
67+
68+
// normalize phi and add it to new gamma and new beta
69+
float phi_sum = ReduceSum(_phi, num_topics);
70+
71+
for (int l = threadIdx.x; l < num_topics; l += blockDim.x) {
72+
_phi[l] /= phi_sum;
73+
if (not _vali) _new_gamma[l] += _phi[l];
74+
}
75+
__syncthreads();
76+
}
77+
78+
if (j + 1 == num_iters) {
79+
// write access of w th vector of new_beta
80+
if (threadIdx.x == 0) {
81+
while (atomicCAS(&mutex[w], 0, 1)) {}
82+
}
83+
84+
__syncthreads();
85+
for (int l = threadIdx.x; l < num_topics; l += blockDim.x) {
86+
if (j + 1 == num_iters) {
87+
if (not _vali) new_beta[w * num_topics + l] += _phi[l];
88+
_phi[l] *= beta[w * num_topics + l];
89+
}
90+
}
91+
__syncthreads();
92+
93+
// release lock
94+
if (threadIdx.x == 0) mutex[w] = 0;
95+
__syncthreads();
96+
97+
float p = fmaxf(EPS, ReduceSum(_phi, num_topics));
98+
if (threadIdx.x == 0) {
99+
if (_vali)
100+
vali_losses[blockIdx.x] += logf(p);
101+
else
102+
train_losses[blockIdx.x] += logf(p);
103+
}
104+
}
105+
__syncthreads();
106+
}
107+
108+
// update gamma
109+
for (int k = threadIdx.x; k < num_topics; k += blockDim.x)
110+
_gamma[k] = _new_gamma[k] + alpha[k];
111+
__syncthreads();
112+
}
113+
float gamma_sum = ReduceSum(_gamma, num_topics);
114+
for (int j = threadIdx.x; j < num_topics; j += blockDim.x)
115+
_grad_alpha[j] += (Digamma(_gamma[j]) - Digamma(gamma_sum));
116+
117+
__syncthreads();
118+
}
119+
}
120+
121+
} // cusim

cpp/include/culda/culda.hpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// Copyright (c) 2021 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
#pragma once
7+
#include <thrust/copy.h>
8+
#include <thrust/fill.h>
9+
#include <thrust/random.h>
10+
#include <thrust/host_vector.h>
11+
#include <thrust/device_vector.h>
12+
#include <thrust/binary_search.h>
13+
#include <thrust/execution_policy.h>
14+
15+
#include <omp.h>
16+
#include <set>
17+
#include <random>
18+
#include <memory>
19+
#include <string>
20+
#include <fstream>
21+
#include <utility>
22+
#include <queue>
23+
#include <deque>
24+
#include <functional>
25+
#include <vector>
26+
#include <cmath>
27+
#include <chrono> // NOLINT
28+
29+
#include "json11.hpp"
30+
#include "utils/log.hpp"
31+
#include "utils/types.hpp"
32+
33+
namespace cusim {
34+
35+
36+
// reference: https://people.math.sc.edu/Burkardt/cpp_src/asa121/asa121.cpp
37+
inline float Trigamma(float x) {
38+
const float a = 0.0001f;
39+
const float b = 5.0f;
40+
const float b2 = 0.1666666667f;
41+
const float b4 = -0.03333333333f;
42+
const float b6 = 0.02380952381f;
43+
const float b8 = -0.03333333333f;
44+
float value = 0, y = 0, z = x;
45+
if (x <= a) return 1.0f / x / x;
46+
while (z < b) {
47+
value += 1.0f / z / z;
48+
z++;
49+
}
50+
y = 1.0f / z / z;
51+
value += value + 0.5 * y + (1.0
52+
+ y * (b2
53+
+ y * (b4
54+
+ y * (b6
55+
+ y * b8)))) / z;
56+
return value;
57+
}
58+
59+
60+
class CuLDA {
61+
public:
62+
CuLDA();
63+
~CuLDA();
64+
bool Init(std::string opt_path);
65+
void LoadModel(float* alpha, float* beta,
66+
float* grad_alpha, float* new_beta, const int num_words);
67+
std::pair<float, float> FeedData(
68+
const int* indices, const int* indptr, const bool* vali,
69+
const int num_indices, const int num_indptr, const int num_iters);
70+
void Pull();
71+
void Push();
72+
int GetBlockCnt();
73+
74+
private:
75+
DeviceInfo dev_info_;
76+
json11::Json opt_;
77+
std::shared_ptr<spdlog::logger> logger_;
78+
thrust::device_vector<float> dev_alpha_, dev_beta_;
79+
thrust::device_vector<float> dev_grad_alpha_, dev_new_beta_;
80+
thrust::device_vector<float> dev_gamma_, dev_new_gamma_, dev_phi_;
81+
thrust::device_vector<int> dev_mutex_;
82+
83+
float *alpha_, *beta_, *grad_alpha_, *new_beta_;
84+
int block_cnt_, block_dim_;
85+
int num_topics_, num_words_;
86+
};
87+
88+
} // namespace cusim

cpp/include/types.hpp

Lines changed: 0 additions & 41 deletions
This file was deleted.

0 commit comments

Comments
 (0)