Skip to content

Commit 78bf148

Browse files
authored
chore: added kernel_only option for fast build (#505)
1 parent 804bfca commit 78bf148

File tree

16 files changed

+271
-171
lines changed

16 files changed

+271
-171
lines changed

.ci/docker/common/install_cuda.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ else
1212
arch_path='sbsa'
1313
fi
1414

15-
NVSHMEM_VERSION=3.3.24
15+
NVSHMEM_VERSION=3.4.5
1616

1717
function install_cuda {
1818
version=$1
@@ -178,10 +178,10 @@ function install_130 {
178178
NCCL_VERSION=v2.27.7-1
179179
CUSPARSELT_VERSION=0.8.0.4_cuda13
180180

181-
echo "Installing CUDA 12.8.1, cuDNN ${CUDNN_VERSION}, NCCL ${NCCL_VERSION}, NVSHMEM ${NVSHMEM_VERSION} and cuSparseLt ${CUSPARSELT_VERSION}"
181+
echo "Installing CUDA 13.0.2, cuDNN ${CUDNN_VERSION}, NCCL ${NCCL_VERSION}, NVSHMEM ${NVSHMEM_VERSION} and cuSparseLt ${CUSPARSELT_VERSION}"
182182

183183
# install CUDA 13.0 in the same container
184-
install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
184+
install_cuda 13.0.2 cuda_13.0.2_580.95.05_linux
185185

186186
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
187187
install_cudnn 13 $CUDNN_VERSION

.github/workflows/create_release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
fail-fast: false
2525
matrix:
2626
python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
27-
cuda: ["12.6", "12.8", "12.9"]
27+
cuda: ["12.6", "12.8", "13.0"]
2828
torch: ["2.8.0"]
2929
runs-on: [self-hosted, linux]
3030
env:

.github/workflows/publish_devel_image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
strategy:
2222
fail-fast: false
2323
matrix:
24-
cuda: ["12.6", "12.8", "12.9", "13.0"]
24+
cuda: ["12.6", "12.8", "13.0"]
2525
gcc: ["12"]
2626
runs-on: [self-hosted, linux]
2727
steps:

.github/workflows/publish_manylinux_2_28_image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
strategy:
2222
fail-fast: false
2323
matrix:
24-
cuda: ["12.6", "12.8", "12.9"]
24+
cuda: ["12.6", "12.8", "13.0"]
2525
runs-on: [self-hosted, linux]
2626
steps:
2727
- name: Checkout repository

CMakeLists.txt

Lines changed: 54 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ option(USE_MANYLINUX "Build for manylinux" OFF)
77
option(BUILD_NVBENCH "Build the nvbench binary" OFF)
88
option(INSTALL_PY_MODULE "Install python module to scalellm directory" OFF)
99

10+
option(BUILD_KERNEL_ONLY "Build only the CUDA kernel library" OFF)
11+
1012
set(CMAKE_CXX_STANDARD 17)
1113
set(CMAKE_CXX_STANDARD_REQUIRED ON)
1214
set(CMAKE_CXX_EXTENSIONS OFF)
@@ -92,6 +94,9 @@ message(STATUS "TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}")
9294

9395
# configure vcpkg
9496
# have to set CMAKE_TOOLCHAIN_FILE before first project call.
97+
if (NOT BUILD_KERNEL_ONLY)
98+
set(VCPKG_MANIFEST_FEATURES service)
99+
endif()
95100
if (DEFINED ENV{VCPKG_ROOT} AND NOT DEFINED CMAKE_TOOLCHAIN_FILE)
96101
set(CMAKE_TOOLCHAIN_FILE "$ENV{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
97102
CACHE STRING "Vcpkg toolchain file")
@@ -121,64 +126,71 @@ project(
121126
LANGUAGES C CXX CUDA
122127
)
123128

124-
find_package(CUDAToolkit REQUIRED)
125-
126129
# setup CMake module path, defines path for include() and find_package()
127130
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
128-
enable_language(Rust)
129-
find_package(Rust REQUIRED)
130-
131-
# include custom cmake modules
132-
include(static_analyzers)
133-
# TODO: can't use sanitizers with CUDA for now.
134-
# include(sanitizers)
135-
136-
if(UNIX)
137-
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
138-
endif()
131+
# include current and third_party paths
132+
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src)
133+
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
139134

140-
find_package(Boost REQUIRED)
141-
find_package(Threads REQUIRED)
142-
# find all dependencies from vcpkg
143-
find_package(fmt CONFIG REQUIRED GLOBAL)
135+
# find required common packages
136+
find_package(CUDAToolkit REQUIRED)
144137
find_package(glog CONFIG REQUIRED)
145-
find_package(gflags CONFIG REQUIRED)
146138
find_package(absl CONFIG REQUIRED)
147-
find_package(Protobuf CONFIG REQUIRED)
148-
find_package(gRPC CONFIG REQUIRED)
149-
find_package(re2 CONFIG REQUIRED)
150-
find_package(folly CONFIG REQUIRED)
151139
find_package(GTest CONFIG REQUIRED)
152-
find_package(benchmark CONFIG REQUIRED)
153-
find_package(nlohmann_json CONFIG REQUIRED)
154-
find_package(prometheus-cpp CONFIG REQUIRED)
155-
find_package(RapidJSON CONFIG REQUIRED)
140+
141+
# find packages for service build
142+
if (NOT BUILD_KERNEL_ONLY)
143+
enable_language(Rust)
144+
find_package(Rust REQUIRED)
145+
146+
# include custom cmake modules
147+
include(static_analyzers)
148+
# TODO: can't use sanitizers with CUDA for now.
149+
# include(sanitizers)
150+
151+
if(UNIX)
152+
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
153+
endif()
154+
155+
find_package(Boost REQUIRED)
156+
find_package(Threads REQUIRED)
157+
# find all dependencies from vcpkg
158+
find_package(fmt CONFIG REQUIRED GLOBAL)
159+
find_package(gflags CONFIG REQUIRED)
160+
find_package(Protobuf CONFIG REQUIRED)
161+
find_package(gRPC CONFIG REQUIRED)
162+
find_package(re2 CONFIG REQUIRED)
163+
find_package(folly CONFIG REQUIRED)
164+
find_package(benchmark CONFIG REQUIRED)
165+
find_package(nlohmann_json CONFIG REQUIRED)
166+
find_package(prometheus-cpp CONFIG REQUIRED)
167+
find_package(RapidJSON CONFIG REQUIRED)
168+
169+
find_package(NCCL REQUIRED)
170+
171+
find_package(Jemalloc)
172+
if(Jemalloc_FOUND)
173+
link_libraries(Jemalloc::jemalloc)
174+
endif()
175+
endif()
156176

157177
if (USE_MANYLINUX)
158178
# manylinux doesn't ship Development.Embed
159179
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
160180
else()
161181
find_package(Python REQUIRED COMPONENTS Interpreter Development)
162182
endif()
163-
164-
find_package(NCCL REQUIRED)
165-
166-
find_package(Jemalloc)
167-
if(Jemalloc_FOUND)
168-
link_libraries(Jemalloc::jemalloc)
169-
endif()
170-
171183
# Important Note: Always invoke find_package for other dependencies
172184
# before including libtorch, as doing so afterwards may lead to
173185
# unexpected linker errors.
174186
if (DEFINED ENV{LIBTORCH_ROOT})
175187
find_package(Torch REQUIRED HINTS "$ENV{LIBTORCH_ROOT}")
176188
message(STATUS "Using libtorch at $ENV{LIBTORCH_ROOT}")
177189
else()
178-
SET(TORCH_VERSION "2.8.0")
190+
SET(TORCH_VERSION "2.9.0")
179191
include(FetchContent)
180-
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.9)
181-
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu129/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcu129.zip")
192+
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 13.0)
193+
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu130/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcu130.zip")
182194
elseif (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.8)
183195
set(LIBTORCH_URL "https://download.pytorch.org/libtorch/cu128/libtorch-shared-with-deps-${TORCH_VERSION}%2Bcu128.zip")
184196
elseif(CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12.6)
@@ -235,12 +247,10 @@ message(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
235247
include(CTest)
236248
include(GoogleTest)
237249

238-
# include current path
239-
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/src)
240-
list(APPEND COMMON_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
241-
242250
# add subdirectories
243-
add_subdirectory(proto)
244-
add_subdirectory(src)
245251
add_subdirectory(third_party)
246-
add_subdirectory(scalellm)
252+
add_subdirectory(src)
253+
if (NOT BUILD_KERNEL_ONLY)
254+
add_subdirectory(proto)
255+
add_subdirectory(scalellm)
256+
endif()

src/CMakeLists.txt

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
1-
add_subdirectory(chat_template)
2-
add_subdirectory(common)
3-
add_subdirectory(handlers)
1+
add_subdirectory(gtest_main)
42
add_subdirectory(kernels)
5-
add_subdirectory(tokenizer)
6-
add_subdirectory(layers)
7-
add_subdirectory(models)
8-
add_subdirectory(model_loader)
9-
add_subdirectory(model_parallel)
10-
add_subdirectory(sampling)
11-
add_subdirectory(request)
12-
add_subdirectory(memory)
13-
add_subdirectory(scheduler)
14-
add_subdirectory(speculative)
15-
add_subdirectory(engine)
16-
add_subdirectory(server)
17-
add_subdirectory(benchmark)
18-
add_subdirectory(huggingface)
3+
4+
if (NOT BUILD_KERNEL_ONLY)
5+
add_subdirectory(chat_template)
6+
add_subdirectory(common)
7+
add_subdirectory(handlers)
8+
add_subdirectory(tokenizer)
9+
add_subdirectory(layers)
10+
add_subdirectory(models)
11+
add_subdirectory(model_loader)
12+
add_subdirectory(model_parallel)
13+
add_subdirectory(sampling)
14+
add_subdirectory(request)
15+
add_subdirectory(memory)
16+
add_subdirectory(scheduler)
17+
add_subdirectory(speculative)
18+
add_subdirectory(engine)
19+
add_subdirectory(server)
20+
add_subdirectory(benchmark)
21+
add_subdirectory(huggingface)
22+
endif()

src/common/CMakeLists.txt

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ include(cc_library)
22
include(cc_test)
33

44
cc_library(
5-
NAME
5+
NAME
66
common
77
HDRS
88
macros.h
@@ -28,21 +28,8 @@ cc_library(
2828
glog::glog
2929
)
3030

31-
cc_library(
32-
TESTONLY
33-
NAME
34-
gtest_main
35-
SRCS
36-
gtest_main.cpp
37-
DEPS
38-
GTest::gtest
39-
CUDA::toolkit
40-
LINKOPTS
41-
cudart
42-
)
43-
4431
cc_test(
45-
NAME
32+
NAME
4633
common_test
4734
SRCS
4835
range_test.cpp
@@ -55,4 +42,3 @@ cc_test(
5542
:gtest_main
5643
torch
5744
)
58-

src/gtest_main/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
include(cc_library)
2+
3+
cc_library(
4+
TESTONLY
5+
NAME
6+
gtest_main
7+
SRCS
8+
gtest_main.cpp
9+
DEPS
10+
GTest::gtest
11+
CUDA::toolkit
12+
LINKOPTS
13+
cudart
14+
)

src/huggingface/src/lib.rs

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
// A simple C wrapper of safetensors and tokenizers library
22
// adapted from https://github.com/huggingface/safetensors/tree/c_bindings
33

4+
// #[repr(C)]: use C language’s data layout
5+
// extern "C": tells Rust to use C ABI conventions
6+
// #[no_mangle]: prevent Rust from renaming the function
7+
48
// Import the needed libraries
59
use core::ffi::c_uint;
610
use core::str::Utf8Error;
@@ -415,7 +419,6 @@ unsafe fn _get_tensor(
415419
Ok(())
416420
}
417421

418-
419422
// A simple C wrapper of hf-tokenzier library
420423
// ported from https://github.com/mlc-ai/tokenizers-cpp
421424

@@ -433,12 +436,8 @@ pub struct TokenizerWrapper {
433436
impl TokenizerWrapper {
434437
pub fn encode(&mut self, text: &str, add_special_tokens: bool) {
435438
// Encode the text and store the ids
436-
self.encode_ids = Vec::from(
437-
self.tokenizer
438-
.encode(text, add_special_tokens)
439-
.unwrap()
440-
.get_ids(),
441-
);
439+
let encoded = self.tokenizer.encode(text, add_special_tokens).unwrap();
440+
self.encode_ids = encoded.get_ids().to_vec();
442441
}
443442

444443
pub fn decode(&mut self, ids: Vec<u32>, skip_special_tokens: bool) {
@@ -453,11 +452,8 @@ impl TokenizerWrapper {
453452

454453
#[no_mangle]
455454
extern "C" fn tokenizer_from_file(path: *const c_char) -> *mut TokenizerWrapper {
456-
let c_str = unsafe { CStr::from_ptr(path) };
457-
let path_str = match c_str.to_str() {
458-
Ok(s) => s,
459-
Err(_) => panic!("Failed to convert C string to Rust string"),
460-
};
455+
let c_str = unsafe { CStr::from_ptr(path) }; // borrowed C string
456+
let path_str = c_str.to_str().unwrap(); // convert CString to &str
461457

462458
let boxed = Box::new(TokenizerWrapper {
463459
tokenizer: Tokenizer::from_file(path_str).unwrap().into(),
@@ -466,6 +462,7 @@ extern "C" fn tokenizer_from_file(path: *const c_char) -> *mut TokenizerWrapper
466462
id_to_token_result: String::new(),
467463
});
468464

465+
// Convert into a raw pointer: *mut TokenizerWrapper
469466
Box::into_raw(boxed)
470467
}
471468

@@ -527,11 +524,7 @@ extern "C" fn tokenizer_free(wrapper: *mut TokenizerWrapper) {
527524
}
528525

529526
#[no_mangle]
530-
extern "C" fn tokenizer_token_to_id(
531-
handle: *mut TokenizerWrapper,
532-
token: *const u8,
533-
len: usize
534-
) {
527+
extern "C" fn tokenizer_token_to_id(handle: *mut TokenizerWrapper, token: *const u8, len: usize) {
535528
unsafe {
536529
let token: &str = std::str::from_utf8(std::slice::from_raw_parts(token, len)).unwrap();
537530
let id = (*handle).tokenizer.token_to_id(token);
@@ -564,8 +557,7 @@ extern "C" fn tokenizer_id_to_token(
564557
#[no_mangle]
565558
extern "C" fn tokenizer_get_vocab_size(
566559
handle: *mut TokenizerWrapper,
567-
with_added_tokens: bool) -> usize {
568-
unsafe {
569-
(*handle).get_vocab_size(with_added_tokens)
570-
}
560+
with_added_tokens: bool,
561+
) -> usize {
562+
unsafe { (*handle).get_vocab_size(with_added_tokens) }
571563
}

0 commit comments

Comments
 (0)