Skip to content

Commit 7cf87c9

Browse files
committed
implement bindings
1 parent 084c4cb commit 7cf87c9

File tree

5 files changed

+287
-0
lines changed

5 files changed

+287
-0
lines changed

cpp/include/cuw2v/cuw2v.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ class CuW2V {
5959
int GetBlockCnt();
6060
std::pair<float, float> FeedData(const int* cols, const int* indptr,
6161
const int num_cols, const int num_indptr);
62+
void Pull();
6263

6364
private:
6465
DeviceInfo dev_info_;

cpp/src/cuw2v/cuw2v.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,4 +264,10 @@ std::pair<float, float> CuW2V::FeedData(const int* cols, const int* indptr,
264264
return {loss_nume_sum, loss_deno_sum};
265265
}
266266

267+
void CuW2V::Pull() {
268+
thrust::copy(dev_emb_in_.begin(), dev_emb_in_.end(), emb_in_);
269+
thrust::copy(dev_emb_out_.begin(), dev_emb_out_.end(), emb_out_);
270+
CHECK_CUDA(cudaDeviceSynchronize());
271+
}
272+
267273
} // namespace cusim

cusim/cuw2v/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Copyright (c) 2021 Jisang Yoon
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the Apache 2.0 license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
from cusim.cuw2v.pycuw2v import CuW2V

cusim/cuw2v/bindings.cc

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Copyright (c) 2021 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
#include <pybind11/pybind11.h>
7+
#include <pybind11/numpy.h>
8+
#include <pybind11/stl.h>
9+
10+
#include <iostream>
11+
#include "cuw2v/cuw2v.hpp"
12+
13+
namespace py = pybind11;
14+
15+
typedef py::array_t<float, py::array::c_style | py::array::forcecast> float_array;
16+
typedef py::array_t<int, py::array::c_style | py::array::forcecast> int_array;
17+
18+
class CuW2VBind {
19+
public:
20+
CuW2VBind() {}
21+
22+
bool Init(std::string opt_path) {
23+
return obj_.Init(opt_path);
24+
}
25+
26+
void LoadModel(py::object& emb_in, py::object& emb_out) {
27+
// check shape of alpha and beta
28+
float_array _emb_in(emb_in);
29+
float_array _emb_out(emb_out);
30+
auto emb_in_buffer = _emb_in.request();
31+
auto emb_out_buffer = _emb_out.request();
32+
if (emb_in_buffer.ndim != 2 or emb_out_buffer_buffer.ndim != 2 or
33+
emb_in_buffer.shape[1] != emb_out_buffer.shape[1]) {
34+
throw std::runtime_error("invalid emb_in or emb_out");
35+
}
36+
37+
return obj_.LoadModel(_emb_in.mutable_data(0), _emb_out.mutable_data(0));
38+
}
39+
40+
void BuildRandomTable(py::object& word_count, int table_size, int num_threads) {
41+
float_array _word_count(word_count);
42+
auto wc_buffer = _word_count.requiest();
43+
if (wc_buffer.ndim != 1) {
44+
throw std::runtime_error("invalid word count");
45+
}
46+
int num_words = wc_buffer.shape[0];
47+
obj_.BuildRandomTable(_word_count.data(0), num_words, table_size, num_threads);
48+
}
49+
50+
void BuildHuffmanTree(py::object& word_count) {
51+
float_array _word_count(word_count);
52+
auto wc_buffer = _word_count.requiest();
53+
if (wc_buffer.ndim != 1) {
54+
throw std::runtime_error("invalid word count");
55+
}
56+
int num_words = wc_buffer.shape[0];
57+
obj_.BuildHuffmanTree(_word_count.data(0), num_words);
58+
}
59+
60+
std::pair<float, float> FeedData(py::object& cols, py::object& indptr) {
61+
int_array _cols(cols);
62+
int_array _indptr(indptr);
63+
auto cols_buffer = _cols.request();
64+
auto indptr_buffer = _indptr.request();
65+
if (cols_buffer.ndim != 1 or indptr_buffer.ndim != 1) {
66+
throw std::runtime_error("invalid cols or indptr");
67+
}
68+
int num_cols = cols_buffer.shape[0];
69+
int num_indptr = indptr_buffer.shape[0] - 1;
70+
return obj_.FeedData(_cols.data(0), _indptr.data(0), num_cols, num_indptr);
71+
}
72+
73+
void Pull() {
74+
obj_.Pull();
75+
}
76+
77+
int GetBlockCnt() {
78+
return obj_.GetBlockCnt();
79+
}
80+
81+
private:
82+
cusim::CuW2V obj_;
83+
};
84+
85+
PYBIND11_PLUGIN(cuw2v_bind) {
86+
py::module m("CuW2VBind");
87+
88+
py::class_<CuW2VBind>(m, "CuW2VBind")
89+
.def(py::init())
90+
.def("init", &CuW2VBind::Init, py::arg("opt_path"))
91+
.def("load_model", &CuW2VBind::LoadModel,
92+
py::arg("emb_in"), py::arg("emb_out"))
93+
.def("feed_data", &CuW2VBind::FeedData,
94+
py::arg("cols"), py::arg("indptr"))
95+
.def("pull", &CuW2VBind::Pull)
96+
.def("build_random_table", &CuW2VBind::BuildRandomTable,
97+
py::arg("word_count"), py::arg("table_size"), py::arg("num_threads"))
98+
.def("build_huffman_tree", &CuW2VBind::BuildHuffmanTree,
99+
py::arg("word_count"))
100+
.def("get_block_cnt", &CuW2VBind::GetBlockCnt)
101+
.def("__repr__",
102+
[](const CuW2VBind &a) {
103+
return "<CuW2VBind>";
104+
}
105+
);
106+
return m.ptr();
107+
}

cusim/cuw2v/pycuw2v.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# Copyright (c) 2021 Jisang Yoon
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the Apache 2.0 license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# pylint: disable=no-name-in-module,too-few-public-methods,no-member
8+
import os
9+
from os.path import join as pjoin
10+
11+
import json
12+
import tempfile
13+
14+
import h5py
15+
import numpy as np
16+
from scipy.special import polygamma as pg
17+
18+
from cusim import aux, IoUtils
19+
from cusim.culda.culda_bind import CuLDABind
20+
from cusim.config_pb2 import CuLDAConfigProto
21+
22+
EPS = 1e-10
23+
24+
class CuLDA:
25+
def __init__(self, opt=None):
26+
self.opt = aux.get_opt_as_proto(opt or {}, CuLDAConfigProto)
27+
self.logger = aux.get_logger("culda", level=self.opt.py_log_level)
28+
29+
tmp = tempfile.NamedTemporaryFile(mode='w', delete=False)
30+
opt_content = json.dumps(aux.proto_to_dict(self.opt), indent=2)
31+
tmp.write(opt_content)
32+
tmp.close()
33+
34+
self.logger.info("opt: %s", opt_content)
35+
self.obj = CuLDABind()
36+
assert self.obj.init(bytes(tmp.name, "utf8")), f"failed to load {tmp.name}"
37+
os.remove(tmp.name)
38+
39+
self.words, self.num_words, self.num_docs = None, None, None
40+
self.alpha, self.beta, self.grad_alpha, self.new_beta = \
41+
None, None, None, None
42+
43+
def preprocess_data(self):
44+
if self.opt.skip_preprocess:
45+
return
46+
iou = IoUtils()
47+
if not self.opt.processed_data_dir:
48+
self.opt.processed_data_dir = tempfile.TemporaryDirectory().name
49+
iou.convert_stream_to_h5(self.opt.data_path, self.opt.word_min_count,
50+
self.opt.processed_data_dir)
51+
52+
def init_model(self):
53+
# load voca
54+
data_dir = self.opt.processed_data_dir
55+
self.logger.info("load key from %s", pjoin(data_dir, "keys.txt"))
56+
with open(pjoin(data_dir, "keys.txt"), "rb") as fin:
57+
self.words = [line.strip() for line in fin]
58+
self.num_words = len(self.words)
59+
60+
# count number of docs
61+
h5f = h5py.File(pjoin(data_dir, "token.h5"), "r")
62+
self.num_docs = h5f["indptr"].shape[0] - 1
63+
h5f.close()
64+
65+
self.logger.info("number of words: %d, docs: %d",
66+
self.num_words, self.num_docs)
67+
68+
# random initialize alpha and beta
69+
np.random.seed(self.opt.seed)
70+
self.alpha = np.random.uniform( \
71+
size=(self.opt.num_topics,)).astype(np.float32)
72+
self.beta = np.random.uniform( \
73+
size=(self.num_words, self.opt.num_topics)).astype(np.float32)
74+
self.beta /= np.sum(self.beta, axis=0)[None, :]
75+
self.logger.info("alpha %s, beta %s initialized",
76+
self.alpha.shape, self.beta.shape)
77+
78+
# zero initialize grad alpha and new beta
79+
block_cnt = self.obj.get_block_cnt()
80+
self.grad_alpha = np.zeros(shape=(block_cnt, self.opt.num_topics),
81+
dtype=np.float32)
82+
self.new_beta = np.zeros(shape=self.beta.shape, dtype=np.float32)
83+
self.logger.info("grad alpha %s, new beta %s initialized",
84+
self.grad_alpha.shape, self.new_beta.shape)
85+
86+
# push it to gpu
87+
self.obj.load_model(self.alpha, self.beta, self.grad_alpha, self.new_beta)
88+
89+
def train_model(self):
90+
self.preprocess_data()
91+
self.init_model()
92+
h5f = h5py.File(pjoin(self.opt.processed_data_dir, "token.h5"), "r")
93+
for epoch in range(1, self.opt.epochs + 1):
94+
self.logger.info("Epoch %d / %d", epoch, self.opt.epochs)
95+
self._train_e_step(h5f)
96+
self._train_m_step()
97+
h5f.close()
98+
99+
def _train_e_step(self, h5f):
100+
offset, size = 0, h5f["cols"].shape[0]
101+
pbar = aux.Progbar(size, stateful_metrics=["train_loss", "vali_loss"])
102+
train_loss_nume, train_loss_deno = 0, 0
103+
vali_loss_nume, vali_loss_deno = 0, 0
104+
while True:
105+
target = h5f["indptr"][offset] + self.opt.batch_size
106+
if target < size:
107+
next_offset = h5f["rows"][target]
108+
else:
109+
next_offset = h5f["indptr"].shape[0] - 1
110+
indptr = h5f["indptr"][offset:next_offset + 1]
111+
beg, end = indptr[0], indptr[-1]
112+
indptr -= beg
113+
cols = h5f["cols"][beg:end]
114+
vali = (h5f["vali"][beg:end] < self.opt.vali_p).astype(np.bool)
115+
offset = next_offset
116+
117+
# call cuda kernel
118+
train_loss, vali_loss = \
119+
self.obj.feed_data(cols, indptr, vali, self.opt.num_iters_in_e_step)
120+
121+
# accumulate loss
122+
train_loss_nume -= train_loss
123+
vali_loss_nume -= vali_loss
124+
vali_cnt = np.count_nonzero(vali)
125+
train_cnt = len(vali) - vali_cnt
126+
train_loss_deno += train_cnt
127+
vali_loss_deno += vali_cnt
128+
train_loss = train_loss_nume / (train_loss_deno + EPS)
129+
vali_loss = vali_loss_nume / (vali_loss_deno + EPS)
130+
131+
# update progress bar
132+
pbar.update(end, values=[("train_loss", train_loss),
133+
("vali_loss", vali_loss)])
134+
if end == size:
135+
break
136+
137+
def _train_m_step(self):
138+
self.obj.pull()
139+
140+
# update beta
141+
self.new_beta[:, :] = np.maximum(self.new_beta, EPS)
142+
self.beta[:, :] = self.new_beta / np.sum(self.new_beta, axis=0)[None, :]
143+
self.new_beta[:, :] = 0
144+
145+
# update alpha
146+
alpha_sum = np.sum(self.alpha)
147+
gvec = np.sum(self.grad_alpha, axis=0)
148+
gvec += self.num_docs * (pg(0, alpha_sum) - pg(0, self.alpha))
149+
hvec = self.num_docs * pg(1, self.alpha)
150+
z_0 = pg(1, alpha_sum)
151+
c_nume = np.sum(gvec / hvec)
152+
c_deno = 1 / z_0 + np.sum(1 / hvec)
153+
c_0 = c_nume / c_deno
154+
delta = (gvec - c_0) / hvec
155+
self.alpha -= delta
156+
self.alpha[:] = np.maximum(self.alpha, EPS)
157+
self.grad_alpha[:,:] = 0
158+
159+
self.obj.push()
160+
161+
def save_model(self, model_path):
162+
self.logger.info("save model path: %s", model_path)
163+
h5f = h5py.File(model_path, "w")
164+
h5f.create_dataset("alpha", data=self.alpha)
165+
h5f.create_dataset("beta", data=self.beta)
166+
h5f.create_dataset("keys", data=np.array(self.words))
167+
h5f.close()

0 commit comments

Comments
 (0)