microsoft · TheMrCodes · Apr 17, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 18, 2024
diff --git a/rllm/llama-cpp-low/Cargo.toml b/rllm/llama-cpp-low/Cargo.toml
@@ -16,3 +16,6 @@ cmake = "0.1.50"
 [features]
 default = []
 cuda = []
+sycl = []
+sycl_fp16 = []
+sycl_nvidia = []
diff --git a/rllm/llama-cpp-low/build.rs b/rllm/llama-cpp-low/build.rs
@@ -6,6 +6,9 @@ const SUBMODULE_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/llama.cpp");
 fn main() {
     let ccache = true;
     let cuda = std::env::var("CARGO_FEATURE_CUDA").unwrap_or(String::new());
+    let sycl = std::env::var("CARGO_FEATURE_SYCL").unwrap_or(String::new());
+    let sycl_fp16 = std::env::var("CARGO_FEATURE_SYCL_FP16").unwrap_or(String::new());
+    let sycl_nvidia = std::env::var("CARGO_FEATURE_SYCL_NVIDIA").unwrap_or(String::new());
 
     let submodule_dir = &PathBuf::from(SUBMODULE_DIR);
     let header_path = submodule_dir.join("llama.h");
@@ -29,15 +32,69 @@ fn main() {
             .configure_arg("-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache");
     }
 
+    if cuda == "1" && sycl == "1" {
+        panic!("Only cuda or sycl can be activated at the same time!");
+    }
     if cuda == "1" {
         cmake.configure_arg("-DLLAMA_CUBLAS=ON");
         println!("cargo:rustc-link-search=/usr/local/cuda/lib64");
         println!("cargo:rustc-link-lib=cuda");
         println!("cargo:rustc-link-lib=cudart");
         println!("cargo:rustc-link-lib=cublas");
         println!("cargo:rustc-link-lib=cupti");
-    }
+    } else if sycl == "1" {
+        cmake.configure_arg("-DLLAMA_SYCL=ON");
+        cmake.configure_arg("-DCMAKE_C_COMPILER=icx");
+        cmake.configure_arg("-DCMAKE_CXX_COMPILER=icpx");
+
+        let dirs = [
+            "/opt/intel/oneapi/compiler/latest/lib",
+            "/opt/intel/oneapi/mkl/latest/lib",
+            //"/opt/intel/oneapi/dnnl/latest/lib",
+        ];
+
+        // *.a => static
+        // *.so => dynamic
+        for dir in dirs.iter() {
+            println!("cargo:rustc-link-search={}", dir);
+            for file in std::fs::read_dir(dir).unwrap() {
+                let file = file.unwrap();
+                let file_name = file.file_name();
+                let file_name = file_name.to_str().unwrap();
+                if !file_name.starts_with("lib") { continue; }
+                if file_name.contains("lp64") && !file_name.contains("ilp64") { continue; }
+                if file_name.contains("seq") { continue; }
+                if file_name == "libmkl_gnu_thread.so" { continue; }
+                let file_name = file_name.trim_start_matches("lib");
 
+                if file_name.ends_with(".so") {
+                    let file_name = &file_name[..file_name.len()-3];
+                    println!("cargo:rustc-link-lib=dylib={}", file_name);
+                } else if file_name.ends_with(".a") {
+                    let file_name = &file_name[..file_name.len()-2];
+                    println!("cargo:rustc-link-lib=static={}", file_name);
+                }
+            }
+        }
+        //panic!("stop here");
+
+        //println!("cargo:rustc-link-search=native=/opt/intel/oneapi/compiler/latest/lib");
+        //println!("cargo:rustc-link-lib=intlc");
+        //println!("cargo:rustc-link-lib=svml");
+        //println!("cargo:rustc-link-lib=sycl");
+        //println!("cargo:rustc-link-search=native=/opt/intel/oneapi/mkl/latest/lib");
+        //println!("cargo:rustc-link-lib=mkl_core");
+        //println!("cargo:rustc-link-lib=mkl_sycl_blas");
+        //println!("cargo:rustc-link-lib=mkl_sycl");
+    }
+    if sycl_fp16 == "1" {
+        cmake.configure_arg("-DLLAMA_SYCL_F16=ON");
+    }
+    if sycl_nvidia == "1" {
+        cmake.configure_arg("-DLLAMA_SYCL_TARGET=NVIDIA");
+    }
+    cmake.very_verbose(true);
+
     let dst = cmake.build();
 
     println!("cargo:rustc-link-search=native={}/lib", dst.display());

diff --git a/rllm/rllm-cuda/server.sh b/rllm/rllm-cuda/server.sh
@@ -41,6 +41,38 @@ while [ "$1" != "" ] ; do
               exit 1
             fi
             ;;
+        --sycl )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl"
+            else
+              echo "--sycl only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
+        --sycl-fp16 )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl,sycl_fp16"
+            else
+              echo "--sycl-fp16 only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
+        --sycl-nvidia )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl,sycl_nvidia"
+            else
+              echo "--sycl-nvidia only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
+        --sycl-nvidia-fp16 )
+            if [ "$CPP" = 1 ] ; then
+              VER="$VER --features sycl,sycl_nvidia,sycl_fp16"
+            else
+              echo "--sycl-nvidia-fp16 only valid for llama.cpp"
+              exit 1
+            fi
+            ;;
         --trace )
             R_LOG=info,tokenizers=error,rllm=trace,aicirt=info,llama_cpp_low=trace
             ;;
@@ -84,7 +116,7 @@ if [ "$CPP" = 1 ] ; then
     * )
     SELF="server.sh"
     cat <<EOF
-usage: $SELF [--loop] [--cuda] [--debug] [model_name] [rllm_args...]
+usage: $SELF [--loop] [--cuda] [--sycl] [--sycl-fp16] [--sycl-nvidia] [--debug] [model_name] [rllm_args...]
 
 model_name can a HuggingFace URL pointing to a .gguf file, or one of the following:
 
@@ -96,9 +128,13 @@ model_name can a HuggingFace URL pointing to a .gguf file, or one of the followi
 
 Additionally, "$SELF build" will just build the server, and not run a model.
 
-  --cuda   try to build llama.cpp against installed CUDA
-  --loop   restart server when it crashes and store logs in ./logs
-  --debug  don't build in --release mode
+  --cuda              try to build llama.cpp against installed CUDA
+  --sycl              try to build llama.cpp against SYCL with fp32 support (Make sure the required sycl environement variables are set)
+  --sycl-fp16         try to build llama.cpp against SYCL with fp16 support
+  --sycl-nvidia       try to build llama.cpp against SYCL with nvidia support
+  --sycl-nvidia-fp16  try to build llama.cpp against SYCL with fp16 and nvidia support
+  --loop              restart server when it crashes and store logs in ./logs
+  --debug             don't build in --release mode
 
 Try $SELF phi2 --help to see available rllm_args
 EOF

diff --git a/rllm/rllm-llamacpp/Cargo.toml b/rllm/rllm-llamacpp/Cargo.toml
@@ -21,3 +21,6 @@ path = "src/rllm-llamacpp.rs"
 [features]
 default = []
 cuda = ["llama_cpp_low/cuda"]
+sycl = ["llama_cpp_low/sycl"]
+sycl_fp16 = ["llama_cpp_low/sycl_fp16"]
+sycl_nvidia = ["llama_cpp_low/sycl_nvidia"]