Skip to content

Commit 70f26c2

Browse files
authored
Merge branch 'main' into feat/dynamic_link
2 parents d441204 + 38686de commit 70f26c2

File tree

10 files changed

+114
-44
lines changed

10 files changed

+114
-44
lines changed

.github/workflows/llama-cpp-rs-check.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ jobs:
4545
- name: checkout
4646
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
4747
- name: Setup QEMU
48-
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3
48+
uses: docker/setup-qemu-action@5927c834f5b4fdf503fca6f4c7eccda82949e1ee
4949
with:
5050
platforms: arm64,amd64
5151
- name: Set up Docker Buildx
52-
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb
52+
uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4
5353
- name: Build
5454
uses: docker/build-push-action@v6
5555
with:

Cargo.lock

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ hf-hub = { version = "0.3.2" }
1616
criterion = "0.5.1"
1717
pprof = "0.13.0"
1818
bindgen = "0.69.4"
19-
cc = "1.0.100"
19+
cc = "1.0.105"
2020
anyhow = "1.0.86"
21-
clap = "4.5.4"
21+
clap = "4.5.8"
2222
encoding_rs = "0.8.34"
2323

2424
[workspace.lints.rust]

embeddings/Cargo.toml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
[package]
22
name = "embeddings"
3-
version = "0.1.60"
3+
version = "0.1.61"
44
edition = "2021"
55

6-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7-
86
[dependencies]
9-
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.60" }
7+
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
108
hf-hub = { workspace = true }
119
clap = { workspace = true , features = ["derive"] }
1210
anyhow = { workspace = true }
1311

12+
[features]
13+
cuda = ["llama-cpp-2/cuda"]
14+
metal = ["llama-cpp-2/metal"]
15+
native = ["llama-cpp-2/native"]
16+
vulkan = ["llama-cpp-2/vulkan"]
17+
1418
[lints]
1519
workspace = true

embeddings/src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct Args {
3535
#[clap(short)]
3636
normalise: bool,
3737
/// Disable offloading layers to the gpu
38-
#[cfg(feature = "cuda")]
38+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
3939
#[clap(long)]
4040
disable_gpu: bool,
4141
}
@@ -78,7 +78,7 @@ fn main() -> Result<()> {
7878
model,
7979
prompt,
8080
normalise,
81-
#[cfg(feature = "cuda")]
81+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
8282
disable_gpu,
8383
} = Args::parse();
8484

@@ -87,13 +87,13 @@ fn main() -> Result<()> {
8787

8888
// offload all layers to the gpu
8989
let model_params = {
90-
#[cfg(feature = "cuda")]
90+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
9191
if !disable_gpu {
9292
LlamaModelParams::default().with_n_gpu_layers(1000)
9393
} else {
9494
LlamaModelParams::default()
9595
}
96-
#[cfg(not(feature = "cuda"))]
96+
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
9797
LlamaModelParams::default()
9898
};
9999

llama-cpp-2/Cargo.toml

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "llama-cpp-2"
33
description = "llama.cpp bindings for Rust"
4-
version = "0.1.60"
4+
version = "0.1.61"
55
edition = "2021"
66
license = "MIT OR Apache-2.0"
77
repository = "https://github.com/utilityai/llama-cpp-rs"
@@ -10,16 +10,33 @@ repository = "https://github.com/utilityai/llama-cpp-rs"
1010

1111
[dependencies]
1212
enumflags2 = "0.7.10"
13-
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.60" }
13+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.61" }
1414
thiserror = { workspace = true }
1515
tracing = { workspace = true }
1616

1717
[features]
1818
cuda = ["llama-cpp-sys-2/cuda"]
1919
metal = ["llama-cpp-sys-2/metal"]
2020
dynamic_link = ["llama-cpp-sys-2/dynamic_link"]
21+
vulkan = ["llama-cpp-sys-2/vulkan"]
22+
native = ["llama-cpp-sys-2/native"]
2123
sampler = []
2224

25+
[target.'cfg(target_feature = "avx")'.dependencies]
26+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx"] }
27+
[target.'cfg(target_feature = "avx2")'.dependencies]
28+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx2"] }
29+
[target.'cfg(target_feature = "avx512f")'.dependencies]
30+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512"] }
31+
[target.'cfg(target_feature = "avx512vbmi")'.dependencies]
32+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512_vmbi"] }
33+
[target.'cfg(target_feature = "avx512vnni")'.dependencies]
34+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512_vnni"] }
35+
[target.'cfg(target_feature = "f16c")'.dependencies]
36+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["f16c"] }
37+
[target.'cfg(target_feature = "fma")'.dependencies]
38+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["fma"] }
39+
2340
[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
2441
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }
2542

llama-cpp-sys-2/Cargo.toml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "llama-cpp-sys-2"
33
description = "Low Level Bindings to llama.cpp"
4-
version = "0.1.60"
4+
version = "0.1.61"
55
edition = "2021"
66
license = "MIT OR Apache-2.0"
77
repository = "https://github.com/utilityai/llama-cpp-rs"
@@ -53,6 +53,15 @@ cc = { workspace = true, features = ["parallel"] }
5353
once_cell = "1.19.0"
5454

5555
[features]
56+
avx = []
57+
avx2 = []
58+
avx512 = []
59+
avx512_vmbi = []
60+
avx512_vnni = []
5661
cuda = []
62+
f16c = []
63+
fma = []
5764
metal = []
5865
dynamic_link = []
66+
vulkan = []
67+
native = []

llama-cpp-sys-2/build.rs

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
507507
let common = LLAMA_PATH.join("ggml-common.h");
508508

509509
let input_file = File::open(ggml_metal_shader_path).expect("Failed to open input file");
510-
let mut output_file =
510+
let output_file =
511511
File::create(&ggml_metal_shader_out_path).expect("Failed to create output file");
512512

513513
let output = Command::new("sed")
@@ -583,11 +583,35 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
583583
.file(LLAMA_PATH.join("ggml-metal.m"));
584584
}
585585

586+
fn find_windows_vulkan_sdk() -> PathBuf {
587+
// if the vulkan sdk is installed in the standard location then this should be pretty fast,
588+
// but we still must search recursively because we don't know the exact version number.
589+
// if it's installed somewhere else, this will take a while, but it's better than failing.
590+
let vulkan_root = Command::new("powershell.exe")
591+
.arg("-Command")
592+
.arg(r#"
593+
if (test-path -pathtype Container "/VulkanSDK") {
594+
$root = "/VulkanSDK"
595+
} else {
596+
$root = "/"
597+
}
598+
get-childitem -path $root -recurse -filter "vulkan.h" 2>$null | foreach-object { $_.directory.parent.parent.fullname }
599+
"#)
600+
.output()
601+
.expect("could not find vulkan.h")
602+
.stdout;
603+
let vulkan_root = String::from_utf8_lossy(
604+
vulkan_root
605+
.split(|c| *c == b'\n')
606+
.next()
607+
.expect("could not find vulkan.h"),
608+
);
609+
PathBuf::from(vulkan_root.trim())
610+
}
611+
586612
fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
587613
println!("Compiling Vulkan GGML..");
588614

589-
// Vulkan gets linked through the ash crate.
590-
591615
if cfg!(debug_assertions) {
592616
cx.define("GGML_VULKAN_DEBUG", None)
593617
.define("GGML_VULKAN_CHECK_RESULTS", None)
@@ -602,12 +626,25 @@ fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
602626

603627
let lib_name = "ggml-vulkan";
604628

605-
cxx.clone()
606-
.include("./thirdparty/Vulkan-Headers/include/")
607-
.include(LLAMA_PATH.as_path())
608-
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
609-
.compile(lib_name);
610-
629+
if cfg!(target_os = "windows") {
630+
let vulkan_root = find_windows_vulkan_sdk();
631+
cxx.clone()
632+
.include(vulkan_root.join("Include"))
633+
.include(LLAMA_PATH.as_path())
634+
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
635+
.compile(lib_name);
636+
println!(
637+
"cargo:rustc-link-search=native={}",
638+
vulkan_root.join("Lib").display()
639+
);
640+
println!("cargo:rustc-link-lib=vulkan-1");
641+
} else {
642+
cxx.clone()
643+
.include(LLAMA_PATH.as_path())
644+
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
645+
.compile(lib_name);
646+
println!("cargo:rustc-link-lib=vulkan");
647+
}
611648
lib_name
612649
}
613650

@@ -673,6 +710,7 @@ fn main() {
673710
push_warn_flags(&mut cx, &mut cxx);
674711
push_feature_flags(&mut cx, &mut cxx);
675712

713+
#[allow(unused_variables)]
676714
let feat_lib = if cfg!(feature = "vulkan") {
677715
Some(compile_vulkan(&mut cx, &mut cxx))
678716
} else if cfg!(feature = "cuda") {

simple/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
[package]
22
name = "simple"
3-
version = "0.1.60"
3+
version = "0.1.61"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
77

88
[dependencies]
9-
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.60" }
9+
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
1010
hf-hub = { workspace = true }
1111
clap = { workspace = true , features = ["derive"] }
1212
anyhow = { workspace = true }
@@ -15,6 +15,8 @@ encoding_rs = { workspace = true }
1515
[features]
1616
cuda = ["llama-cpp-2/cuda"]
1717
metal = ["llama-cpp-2/metal"]
18+
native = ["llama-cpp-2/native"]
19+
vulkan = ["llama-cpp-2/vulkan"]
1820

1921
[lints]
2022
workspace = true

simple/src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct Args {
4444
#[arg(short = 'o', value_parser = parse_key_val)]
4545
key_value_overrides: Vec<(String, ParamOverrideValue)>,
4646
/// Disable offloading layers to the gpu
47-
#[cfg(feature = "cuda")]
47+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
4848
#[clap(long)]
4949
disable_gpu: bool,
5050
#[arg(short = 's', long, help = "RNG seed (default: 1234)")]
@@ -124,7 +124,7 @@ fn main() -> Result<()> {
124124
model,
125125
prompt,
126126
file,
127-
#[cfg(feature = "cuda")]
127+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
128128
disable_gpu,
129129
key_value_overrides,
130130
seed,
@@ -138,13 +138,13 @@ fn main() -> Result<()> {
138138

139139
// offload all layers to the gpu
140140
let model_params = {
141-
#[cfg(feature = "cuda")]
141+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
142142
if !disable_gpu {
143143
LlamaModelParams::default().with_n_gpu_layers(1000)
144144
} else {
145145
LlamaModelParams::default()
146146
}
147-
#[cfg(not(feature = "cuda"))]
147+
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
148148
LlamaModelParams::default()
149149
};
150150

0 commit comments

Comments
 (0)