Skip to content

Commit c7df6e4

Browse files
committed
ensure non-negativitty
1 parent b97c148 commit c7df6e4

File tree

4 files changed

+32
-8
lines changed

4 files changed

+32
-8
lines changed

cpp/include/culda/cuda_lda_kernels.cuh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ __global__ void EstepKernel(
3030
const int num_cols, const int num_indptr,
3131
const int num_topics, const int num_iters,
3232
float* gamma, float* new_gamma, float* phi,
33-
float* alpha, float* beta,
33+
const float* alpha, const float* beta,
3434
float* grad_alpha, float* new_beta, float* train_losses, float* vali_losses) {
3535

3636
// storage for block
@@ -76,12 +76,12 @@ __global__ void EstepKernel(
7676
__syncthreads();
7777
}
7878
if (j + 1 == num_iters) {
79-
float p = ReduceSum(_phi, num_topics);
79+
float p = fmaxf(EPS, ReduceSum(_phi, num_topics));
8080
if (threadIdx.x == 0) {
8181
if (_vali)
82-
vali_losses[blockIdx.x] += logf(p + EPS);
82+
vali_losses[blockIdx.x] += logf(p);
8383
else
84-
train_losses[blockIdx.x] += logf(p + EPS);
84+
train_losses[blockIdx.x] += logf(p);
8585
}
8686
}
8787
__syncthreads();

cpp/src/utils/ioutils.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ void IoUtils::ParseLineImpl(std::string line, std::vector<std::string>& ret) {
3737
int n = line.size();
3838
std::string element;
3939
for (int i = 0; i < n; ++i) {
40-
if (line[i] == ' ' or line[i] == ',') {
40+
if (line[i] == ' ') {
4141
ret.push_back(element);
4242
element.clear();
43-
} else if (line[i] != '"') {
43+
} else {
4444
element += std::tolower(line[i]);
4545
}
4646
}

cusim/culda/pyculda.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def _train_m_step(self):
137137
self.obj.pull()
138138

139139
# update beta
140+
self.new_beta[:, :] = np.maximum(self.new_beta, EPS)
140141
self.beta[:, :] = self.new_beta / np.sum(self.new_beta, axis=0)[None, :]
141142
self.new_beta[:, :] = 0
142143

@@ -151,6 +152,15 @@ def _train_m_step(self):
151152
c_0 = c_nume / c_deno
152153
delta = (gvec - c_0) / hvec
153154
self.alpha -= delta
155+
self.alpha[:] = np.maximum(self.alpha, EPS)
154156
self.grad_alpha[:,:] = 0
155157

156158
self.obj.push()
159+
160+
def save_model(self, model_path):
161+
self.logger.info("save model path: %s", model_path)
162+
h5f = h5py.File(model_path, "w")
163+
h5f.create_dataset("alpha", data=self.alpha)
164+
h5f.create_dataset("beta", data=self.beta)
165+
h5f.create_dataset("keys", data=np.array(self.words))
166+
h5f.close()

examples/example1.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import subprocess
1010
import fire
1111

12+
import h5py
13+
import numpy as np
1214
from gensim import downloader as api
1315
from cusim import aux, IoUtils, CuLDA
1416

@@ -43,11 +45,23 @@ def run_lda():
4345
"data_path": DATA_PATH,
4446
"data_dir": DATA_PATH2,
4547
# "skip_preprocess": True,
46-
"c_log_level": 3,
48+
# "c_log_level": 3,
4749
}
4850
lda = CuLDA(opt)
4951
lda.train_model()
50-
52+
lda.save_model("res/lda.h5")
53+
h5f = h5py.File("res/lda.h5", "r")
54+
beta = h5f["beta"][:]
55+
for i in range(lda.opt.num_topics):
56+
print("=" * 50)
57+
print(f"topic {i + 1}")
58+
words = np.argsort(-beta.T[i])[:10]
59+
print("-" * 50)
60+
for j in range(10):
61+
word = lda.words[words[j]].decode("utf8")
62+
prob = beta[words[j], i]
63+
print(f"rank {j + 1}. word: {word}, prob: {prob}")
64+
h5f.close()
5165

5266
if __name__ == "__main__":
5367
fire.Fire()

0 commit comments

Comments
 (0)