Skip to content

Commit d90f514

Browse files
authored
Merge pull request #12 from js1010/add/travis-ci
Add/travis ci
2 parents 3c5bf8b + d45fd30 commit d90f514

File tree

5 files changed

+140
-41
lines changed

5 files changed

+140
-41
lines changed

.travis.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
language: cpp
2+
3+
sudo: enabled
4+
5+
compiler:
6+
- gcc
7+
8+
matrix:
9+
include:
10+
- name: CUDA 10
11+
env:
12+
- CUDA=10.1.105-1
13+
- CUDA_SHORT=10.1
14+
- UBUNTU_VERSION=ubuntu1804
15+
dist: bionic
16+
17+
before_install:
18+
- sudo apt update
19+
- sudo apt install -y software-properties-common
20+
- sudo add-apt-repository -y ppa:deadsnakes/ppa
21+
- sudo apt update
22+
- sudo apt install -y python3-pip python3.6 g++
23+
- pip3 install -U pip
24+
- pip3 install setuptools
25+
- pip3 install -r requirements.txt
26+
- INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
27+
- wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}
28+
- sudo dpkg -i ${INSTALLER}
29+
- wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub
30+
- sudo apt-key add 7fa2af80.pub
31+
- sudo apt update -qq
32+
- sudo apt install -y cuda-core-${CUDA_SHORT/./-} cuda-cudart-dev-${CUDA_SHORT/./-} cuda-curand-dev-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-}
33+
- sudo apt clean
34+
- export CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
35+
- export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
36+
- export PATH=${CUDA_HOME}/bin:${PATH}
37+
- python3.6 -m grpc_tools.protoc --python_out cusim/ --proto_path cusim/proto/ config.proto
38+
39+
script:
40+
- sudo python3.6 setup.py install

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# CUSIM
2+
3+
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
4+
15
### Introduction
26

37
This project is to speed up various ML models (e.g. topic modeling, word embedding, etc) by CUDA. It would be nice to think of it as [gensim](https://github.com/RaRe-Technologies/gensim)'s GPU version project. As a starting step, I implemented the most widely used word embedding model, the [word2vec](https://arxiv.org/pdf/1301.3781.pdf) model, and the most representative topic model, the [LDA (Latent Dirichlet Allocation)](https://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf) model.

cpp/include/utils/cuda_utils_kernels.cuh

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
// LICENSE file in the root directory of this source tree.
66
#pragma once
77
#include <unistd.h>
8-
#include <cublas_v2.h>
8+
// #include <cublas_v2.h>
99
#include <cuda_runtime.h>
1010

1111
#include <thrust/copy.h>
@@ -39,29 +39,29 @@ inline void checkCuda(cudaError_t code, const char *file, int line) {
3939
}
4040
}
4141

42-
inline const char* cublasGetErrorString(cublasStatus_t status) {
43-
switch (status) {
44-
case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
45-
case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
46-
case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
47-
case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE";
48-
case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH";
49-
case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
50-
case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED";
51-
case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR";
52-
}
53-
return "Unknown";
54-
}
55-
56-
#define CHECK_CUBLAS(code) { checkCublas((code), __FILE__, __LINE__); }
57-
inline void checkCublas(cublasStatus_t code, const char * file, int line) {
58-
if (code != CUBLAS_STATUS_SUCCESS) {
59-
std::stringstream err;
60-
err << "cublas error: " << cublasGetErrorString(code)
61-
<< " (" << file << ":" << line << ")";
62-
throw std::runtime_error(err.str());
63-
}
64-
}
42+
// inline const char* cublasGetErrorString(cublasStatus_t status) {
43+
// switch (status) {
44+
// case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS";
45+
// case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED";
46+
// case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED";
47+
// case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE";
48+
// case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH";
49+
// case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR";
50+
// case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED";
51+
// case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR";
52+
// }
53+
// return "Unknown";
54+
// }
55+
//
56+
// #define CHECK_CUBLAS(code) { checkCublas((code), __FILE__, __LINE__); }
57+
// inline void checkCublas(cublasStatus_t code, const char * file, int line) {
58+
// if (code != CUBLAS_STATUS_SUCCESS) {
59+
// std::stringstream err;
60+
// err << "cublas error: " << cublasGetErrorString(code)
61+
// << " (" << file << ":" << line << ")";
62+
// throw std::runtime_error(err.str());
63+
// }
64+
// }
6565

6666
inline DeviceInfo GetDeviceInfo() {
6767
DeviceInfo ret;

cuda_setup.py

Lines changed: 70 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,60 @@ def find_in_path(name, path):
2828
return None
2929

3030

31+
def get_cuda_sm_list(cuda_ver):
32+
if "CUDA_SM_LIST" in os.environ:
33+
sm_list = os.environ["CUDA_SM_LIST"].split(",")
34+
else:
35+
sm_list = ["30", "52", "60", "61", "70", "75", "80", "86"]
36+
if cuda_ver >= 110:
37+
filter_list = ["30"]
38+
if cuda_ver == 110:
39+
filter_list += ["86"]
40+
else:
41+
filter_list = ["80", "86"]
42+
if cuda_ver < 100:
43+
filter_list += ["75"]
44+
if cuda_ver < 90:
45+
filter_list += ["70"]
46+
if cuda_ver < 80:
47+
filter_list += ["60", "61"]
48+
sm_list = [sm for sm in sm_list if sm not in filter_list]
49+
return sm_list
50+
51+
52+
def get_cuda_compute(cuda_ver):
53+
if "CUDA_COMPUTE" in os.environ:
54+
compute = os.environ["CUDA_COMPUTE"]
55+
else:
56+
if 70 <= cuda_ver < 80:
57+
compute = "52"
58+
if 80 <= cuda_ver < 90:
59+
compute = "61"
60+
if 90 <= cuda_ver < 100:
61+
compute = "70"
62+
if 100 <= cuda_ver < 110:
63+
compute = "75"
64+
if cuda_ver == 110:
65+
compute = "80"
66+
if cuda_ver == 111:
67+
compute = "86"
68+
return compute
69+
70+
71+
def get_cuda_arch(cuda_ver):
72+
if "CUDA_ARCH" in os.environ:
73+
arch = os.environ["CUDA_ARCH"]
74+
else:
75+
if 70 <= cuda_ver < 92:
76+
arch = "30"
77+
if 92 <= cuda_ver < 110:
78+
arch = "50"
79+
if cuda_ver == 110:
80+
arch = "52"
81+
if cuda_ver == 111:
82+
arch = "80"
83+
return arch
84+
3185
def locate_cuda():
3286
"""Locate the CUDA environment on the system
3387
If a valid cuda installation is found
@@ -60,22 +114,23 @@ def locate_cuda():
60114
'your path, or set $CUDA_HOME to enable CUDA extensions')
61115
return None
62116
home = os.path.dirname(os.path.dirname(nvcc))
63-
64117
cudaconfig = {'home': home,
65-
'nvcc': nvcc,
66-
'include': os.path.join(home, 'include'),
67-
'lib64': os.path.join(home, 'lib64')}
68-
post_args = [
69-
"-arch=sm_52",
70-
"-gencode=arch=compute_52,code=sm_52",
71-
"-gencode=arch=compute_60,code=sm_60",
72-
"-gencode=arch=compute_61,code=sm_61",
73-
"-gencode=arch=compute_70,code=sm_70",
74-
"-gencode=arch=compute_75,code=sm_75",
75-
"-gencode=arch=compute_80,code=sm_80",
76-
"-gencode=arch=compute_86,code=sm_86",
77-
"-gencode=arch=compute_86,code=compute_86",
78-
'--ptxas-options=-v', '-O2']
118+
'nvcc': nvcc,
119+
'include': os.path.join(home, 'include'),
120+
'lib64': os.path.join(home, 'lib64')}
121+
cuda_ver = os.path.basename(os.path.realpath(home)).split("-")[1].split(".")
122+
major, minor = int(cuda_ver[0]), int(cuda_ver[1])
123+
cuda_ver = 10 * major + minor
124+
assert cuda_ver >= 70, f"too low cuda ver {major}.{minor}"
125+
print(f"cuda_ver: {major}.{minor}")
126+
arch = get_cuda_arch(cuda_ver)
127+
sm_list = get_cuda_sm_list(cuda_ver)
128+
compute = get_cuda_compute(cuda_ver)
129+
post_args = [f"-arch=sm_{arch}"] + \
130+
[f"-gencode=arch=compute_{sm},code=sm_{sm}" for sm in sm_list] + \
131+
[f"-gencode=arch=compute_{compute},code=compute_{compute}",
132+
"--ptxas-options=-v", "-O2"]
133+
print(f"nvcc post args: {post_args}")
79134
if HALF_PRECISION:
80135
post_args = [flag for flag in post_args if "52" not in flag]
81136

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def __init__(self, name):
9292
extra_compile_args=extra_compile_args,
9393
extra_link_args=["-fopenmp"],
9494
library_dirs=[CUDA['lib64']],
95-
libraries=['cudart', 'cublas', 'curand'],
95+
libraries=['cudart', 'curand'],
9696
extra_objects=[],
9797
include_dirs=[ \
9898
"cpp/include/", np.get_include(), pybind11.get_include(),
@@ -107,7 +107,7 @@ def __init__(self, name):
107107
extra_compile_args=extra_compile_args,
108108
extra_link_args=["-fopenmp"],
109109
library_dirs=[CUDA['lib64']],
110-
libraries=['cudart', 'cublas', 'curand'],
110+
libraries=['cudart', 'curand'],
111111
extra_objects=[],
112112
include_dirs=[ \
113113
"cpp/include/", np.get_include(), pybind11.get_include(),

0 commit comments

Comments
 (0)