Skip to content

Commit 30af2c7

Browse files
committed
add files for pypi
1 parent 1db5a46 commit 30af2c7

File tree

5 files changed

+72
-20
lines changed

5 files changed

+72
-20
lines changed

MANIFEST.in

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
include cuda_setup.py
2+
include requirements.txt
3+
include pyproject.toml
4+
recursive-include cpp/src/cuw2v/ *.cu
5+
recursive-include cpp/src/culda/ *.cu
6+
recursive-include cpp/src/ioutils/ *.cc
7+
recursive-include cpp/include/cuw2v/ *.cuh
8+
recursive-include cpp/include/cuw2v/ *.hpp
9+
recursive-include cpp/include/culda/ *.cuh
10+
recursive-include cpp/include/culda/ *.hpp
11+
recursive-include cpp/include/ioutils/ *.cuh
12+
recursive-include cpp/include/ioutils/ *.hpp
13+
recursive-include 3rd/json11/ *
14+
recursive-include 3rd/spdlog/ *

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ python setup.py install
2929

3030
### Performance
3131

32-
- [AWS P3 2xlarge instance](https://aws.amazon.com/ec2/instance-types/p3/) is used to the experiment. (One Tesla V100 GPU with 8 vcpus)
33-
- results can be reproduced by running `examples/example_w2v.py` and `examples/example_lda.py`
34-
- To evaluate w2v model, we used `evaluate_word_pairs` function ([ref link](https://radimrehurek.com/gensim/auto_examples/tutorials/run_word2vec.html#evaluating)) in gensim, note that better performance on WS-353 test set does not mean that the model will workbetter in application as desribed on the link. However, it is good to be measured quantitively and fast training time will be at least very objective measure of performaance.
35-
- I trained W2V model on quora-duplicat-questions dataset from gensim downloader api with cusim and the performance with gensim.
32+
- [AWS g4dn 2xlarge instance](https://aws.amazon.com/ec2/instance-types/g4/) is used to the experiment. (One NVIDIA T4 GPU with 8 vcpus, Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz)
33+
- results can be reproduced by simply running `examples/example_w2v.py` and `examples/example_lda.py`
34+
- To evaluate w2v model, we used `evaluate_word_pairs` function ([ref link](https://radimrehurek.com/gensim/auto_examples/tutorials/run_word2vec.html#evaluating)) in gensim, note that better performance on WS-353 test set does not necessarily mean that the model will workbetter in application as desribed on the link. However, it is good to be measured quantitively and fast training time will be at least very objective measure of the performaance.
35+
- I trained W2V model on `quora-duplicat-questions` dataset from gensim downloader api on GPU with cusim and compare the performance (both speed and model quality) with gensim.
3636
- To evaluate LDA model, I think there is no good way to measure the quality of traing results quantitatively. But we can check the model by looking at the top words of each topic. Also, we can compare the training time here.
3737
- W2V (CBOW, negative sampling)
3838

@@ -42,6 +42,12 @@ python setup.py install
4242
| pearson | 0.203882 | 0.207705 | 0.221758 | 0.198408 | **0.331749** |
4343
| spearman | 0.25208 | 0.254706 | 0.275231 | 0.238611 | **0.295346** |
4444

45+
46+
- LDA (`nytimes` dataset from https://archive.ics.uci.edu/ml/datasets/bag+of+words)
47+
- I found that setting `workers` variable in gensim LdaMulticore does not work properly (it uses all cores in instance anyway), so I just compared the speed between cusim with single GPU and gensim with 8 vcpus.
48+
- One can compare the quality of modeling by looking at `examples/cusim.topics.txt` and `examples/gensim.topics.txt`.
49+
50+
4551
### Future tasks
4652

4753
- support half precision

examples/example_lda.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import wget
1818
import h5py
1919
import numpy as np
20+
import pandas as pd
2021

2122
# import gensim
2223
from gensim.models.ldamulticore import LdaMulticore
@@ -77,11 +78,12 @@ def run_cusim():
7778
start = time.time()
7879
lda = CuLDA(opt)
7980
lda.train_model()
80-
LOGGER.info("elapsed for training LDA using cusim: %.4e sec",
81-
time.time() - start)
81+
el0 = time.time() - start
82+
LOGGER.info("elapsed for training LDA using cusim: %.4e sec", el0)
8283
h5_model_path = pjoin(DIR_PATH, "cusim.lda.model.h5")
8384
lda.save_h5_model(h5_model_path)
8485
show_cusim_topics(h5_model_path)
86+
return el0
8587

8688
def show_cusim_topics(h5_model_path, topk=10):
8789
h5f = h5py.File(h5_model_path, "r")
@@ -129,15 +131,15 @@ def run_gensim():
129131
id2word[idx] = line.strip()
130132

131133
start = time.time()
132-
# 3 = real cores - 1
133134
lda = LdaMulticore(docs, num_topics=50, workers=None,
134-
id2word=id2word, iterations=10)
135-
LOGGER.info("elapsed for training lda using gensim: %.4e sec",
136-
time.time() - start)
135+
id2word=id2word, iterations=10)
136+
el0 = time.time() - start
137+
LOGGER.info("elapsed for training lda using gensim: %.4e sec", el0)
137138
model_path = pjoin(DIR_PATH, "gensim.lda.model")
138139
LOGGER.info("save gensim lda model to %s", model_path)
139140
lda.save(model_path)
140141
show_gensim_topics(model_path)
142+
return el0
141143

142144
def show_gensim_topics(model_path=None, topk=10):
143145
# load beta
@@ -174,5 +176,14 @@ def show_topics(beta, keys, topk, result_path):
174176
fout.close()
175177

176178

179+
def run_experiments():
180+
training_time = {"attr": "training time (sec)"}
181+
training_time["gensim (8 vpus)"] = run_gensim()
182+
training_time["cusim"] = run_cusim()
183+
df0 = pd.DataFrame([training_time])
184+
df0.set_index("attr", inplace=True)
185+
print(df0.to_markdown())
186+
187+
177188
if __name__ == "__main__":
178189
fire.Fire()

examples/example_w2v.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,23 +126,38 @@ def evaluate_w2v_model(model=GENSIM_MODEL):
126126
LOGGER.info("evaluation results: %s", results)
127127
return results
128128

129-
def run_experiments(sg0=False, hs0=False):
130-
training_time = {"attr": "training_time"}
129+
# gpu model variable is for being displayed in markdown
130+
# please put the real gpu modelname
131+
def run_experiments(skip_gram=False, hierarchical_softmax=False,
132+
gpu_model="NVIDIA T4"):
133+
training_time = {"attr": "training time (sec)"}
131134
pearson = {"attr": "pearson"}
132135
spearman = {"attr": "spearman"}
133136
for i in [1, 2, 4, 8]:
134-
elapsed, evals = run_gensim(sg0, hs0, i)
135-
training_time[f"{i} workers"] = elapsed
136-
pearson[f"{i} workers"] = evals[0][0]
137-
spearman[f"{i} workers"] = evals[1][0]
138-
elapsed, evals = run_cusim(sg0, hs0)
139-
training_time["GPU"] = elapsed
140-
pearson["GPU"] = evals[0][0]
141-
spearman["GPU"] = evals[1][0]
137+
elapsed, evals = run_gensim(skip_gram, hierarchical_softmax, i)
138+
training_time[f"{i} workers (gensim)"] = elapsed
139+
pearson[f"{i} workers (gensim)"] = evals[0][0]
140+
spearman[f"{i} workers (gensim)"] = evals[1][0]
141+
elapsed, evals = run_cusim(skip_gram, hierarchical_softmax)
142+
gpu_title = f"{gpu_model} (cusim)"
143+
training_time[gpu_title] = elapsed
144+
pearson[gpu_title] = evals[0][0]
145+
spearman[gpu_title] = evals[1][0]
142146
df0 = pd.DataFrame([training_time, pearson, spearman])
143147
df0.set_index("attr", inplace=True)
144148
print(df0.to_markdown())
145149

150+
# gpu model variable is for being displayed in markdown
151+
# please put the real gpu modelname
152+
def run_various_experiments(gpu_model="NVIDIA T4"):
153+
for sg0 in [True, False]:
154+
for hs0 in [True, False]:
155+
print("=" * 100)
156+
LOGGER.info("setting: %s, %s",
157+
"skip gram" if sg0 else "cbow",
158+
"hierarchical softmax" if hs0 else "negative sampling")
159+
run_experiments(sg0, hs0, gpu_model)
160+
146161

147162
if __name__ == "__main__":
148163
fire.Fire()

pyproject.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[build-system]
2+
requires = [
3+
"setuptools>=1.3.2",
4+
"numpy",
5+
"pybind11"
6+
]

0 commit comments

Comments
 (0)