Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions rllm/llama-cpp-low/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ cmake = "0.1.50"
[features]
default = []
cuda = []
sycl = []
sycl_fp16 = []
sycl_nvidia = []
59 changes: 58 additions & 1 deletion rllm/llama-cpp-low/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ const SUBMODULE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/llama.cpp");
fn main() {
let ccache = true;
let cuda = std::env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new());
let sycl = std::env::var("CARGO_FEATURE_SYCL").unwrap_or(String::new());
let sycl_fp16 = std::env::var("CARGO_FEATURE_SYCL_FP16").unwrap_or(String::new());
let sycl_nvidia = std::env::var("CARGO_FEATURE_SYCL_NVIDIA").unwrap_or(String::new());

let submodule_dir = &PathBuf::from(SUBMODULE_DIR);
let header_path = submodule_dir.join("llama.h");
Expand All @@ -29,15 +32,69 @@ fn main() {
.configure_arg("-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache");
}

if cuda == "1" && sycl == "1" {
panic!("Only cuda or sycl can be activated at the same time!");
}
if cuda == "1" {
cmake.configure_arg("-DLLAMA_CUBLAS=ON");
println!("cargo:rustc-link-search=/usr/local/cuda/lib64");
println!("cargo:rustc-link-lib=cuda");
println!("cargo:rustc-link-lib=cudart");
println!("cargo:rustc-link-lib=cublas");
println!("cargo:rustc-link-lib=cupti");
}
} else if sycl == "1" {
cmake.configure_arg("-DLLAMA_SYCL=ON");
cmake.configure_arg("-DCMAKE_C_COMPILER=icx");
cmake.configure_arg("-DCMAKE_CXX_COMPILER=icpx");

let dirs = [
"/opt/intel/oneapi/compiler/latest/lib",
"/opt/intel/oneapi/mkl/latest/lib",
//"/opt/intel/oneapi/dnnl/latest/lib",
];

// *.a => static
// *.so => dynamic
for dir in dirs.iter() {
println!("cargo:rustc-link-search={}", dir);
for file in std::fs::read_dir(dir).unwrap() {
let file = file.unwrap();
let file_name = file.file_name();
let file_name = file_name.to_str().unwrap();
if !file_name.starts_with("lib") { continue; }
if file_name.contains("lp64") && !file_name.contains("ilp64") { continue; }
if file_name.contains("seq") { continue; }
if file_name == "libmkl_gnu_thread.so" { continue; }
let file_name = file_name.trim_start_matches("lib");

if file_name.ends_with(".so") {
let file_name = &file_name[..file_name.len()-3];
println!("cargo:rustc-link-lib=dylib={}", file_name);
} else if file_name.ends_with(".a") {
let file_name = &file_name[..file_name.len()-2];
println!("cargo:rustc-link-lib=static={}", file_name);
}
}
}
//panic!("stop here");

//println!("cargo:rustc-link-search=native=/opt/intel/oneapi/compiler/latest/lib");
//println!("cargo:rustc-link-lib=intlc");
//println!("cargo:rustc-link-lib=svml");
//println!("cargo:rustc-link-lib=sycl");
//println!("cargo:rustc-link-search=native=/opt/intel/oneapi/mkl/latest/lib");
//println!("cargo:rustc-link-lib=mkl_core");
//println!("cargo:rustc-link-lib=mkl_sycl_blas");
//println!("cargo:rustc-link-lib=mkl_sycl");
}
if sycl_fp16 == "1" {
cmake.configure_arg("-DLLAMA_SYCL_F16=ON");
}
if sycl_nvidia == "1" {
cmake.configure_arg("-DLLAMA_SYCL_TARGET=NVIDIA");
}
cmake.very_verbose(true);

let dst = cmake.build();

println!("cargo:rustc-link-search=native={}/lib", dst.display());
Expand Down
44 changes: 40 additions & 4 deletions rllm/rllm-cuda/server.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,38 @@ while [ "$1" != "" ] ; do
exit 1
fi
;;
--sycl )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl"
else
echo "--sycl only valid for llama.cpp"
exit 1
fi
;;
--sycl-fp16 )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl,sycl_fp16"
else
echo "--sycl-fp16 only valid for llama.cpp"
exit 1
fi
;;
--sycl-nvidia )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl,sycl_nvidia"
else
echo "--sycl-nvidia only valid for llama.cpp"
exit 1
fi
;;
--sycl-nvidia-fp16 )
if [ "$CPP" = 1 ] ; then
VER="$VER --features sycl,sycl_nvidia,sycl_fp16"
else
echo "--sycl-nvidia-fp16 only valid for llama.cpp"
exit 1
fi
;;
--trace )
R_LOG=info,tokenizers=error,rllm=trace,aicirt=info,llama_cpp_low=trace
;;
Expand Down Expand Up @@ -84,7 +116,7 @@ if [ "$CPP" = 1 ] ; then
* )
SELF="server.sh"
cat <<EOF
usage: $SELF [--loop] [--cuda] [--debug] [model_name] [rllm_args...]
usage: $SELF [--loop] [--cuda] [--sycl] [--sycl-fp16] [--sycl-nvidia] [--debug] [model_name] [rllm_args...]

model_name can a HuggingFace URL pointing to a .gguf file, or one of the following:

Expand All @@ -96,9 +128,13 @@ model_name can a HuggingFace URL pointing to a .gguf file, or one of the followi

Additionally, "$SELF build" will just build the server, and not run a model.

--cuda try to build llama.cpp against installed CUDA
--loop restart server when it crashes and store logs in ./logs
--debug don't build in --release mode
--cuda try to build llama.cpp against installed CUDA
--sycl try to build llama.cpp against SYCL with fp32 support (Make sure the required sycl environement variables are set)
--sycl-fp16 try to build llama.cpp against SYCL with fp16 support
--sycl-nvidia try to build llama.cpp against SYCL with nvidia support
--sycl-nvidia-fp16 try to build llama.cpp against SYCL with fp16 and nvidia support
--loop restart server when it crashes and store logs in ./logs
--debug don't build in --release mode

Try $SELF phi2 --help to see available rllm_args
EOF
Expand Down
3 changes: 3 additions & 0 deletions rllm/rllm-llamacpp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ path = "src/rllm-llamacpp.rs"
[features]
default = []
cuda = ["llama_cpp_low/cuda"]
sycl = ["llama_cpp_low/sycl"]
sycl_fp16 = ["llama_cpp_low/sycl_fp16"]
sycl_nvidia = ["llama_cpp_low/sycl_nvidia"]