Skip to content

Commit 45361ee

Browse files
authored
Merge branch 'main' into main
2 parents 7ada898 + fc04a8a commit 45361ee

File tree

10 files changed

+82
-35
lines changed

10 files changed

+82
-35
lines changed

.github/workflows/llama-cpp-rs-check.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ jobs:
4545
- name: checkout
4646
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
4747
- name: Setup QEMU
48-
uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3
48+
uses: docker/setup-qemu-action@5927c834f5b4fdf503fca6f4c7eccda82949e1ee
4949
with:
5050
platforms: arm64,amd64
5151
- name: Set up Docker Buildx
52-
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb
52+
uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4
5353
- name: Build
5454
uses: docker/build-push-action@v6
5555
with:

Cargo.lock

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ hf-hub = { version = "0.3.2" }
1616
criterion = "0.5.1"
1717
pprof = "0.13.0"
1818
bindgen = "0.69.4"
19-
cc = "1.0.100"
19+
cc = "1.0.105"
2020
anyhow = "1.0.86"
21-
clap = "4.5.4"
21+
clap = "4.5.8"
2222
encoding_rs = "0.8.34"
2323

2424
[workspace.lints.rust]

embeddings/Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@ name = "embeddings"
33
version = "0.1.61"
44
edition = "2021"
55

6-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7-
86
[dependencies]
97
llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
108
hf-hub = { workspace = true }
119
clap = { workspace = true , features = ["derive"] }
1210
anyhow = { workspace = true }
1311

12+
[features]
13+
cuda = ["llama-cpp-2/cuda"]
14+
metal = ["llama-cpp-2/metal"]
15+
native = ["llama-cpp-2/native"]
16+
vulkan = ["llama-cpp-2/vulkan"]
17+
1418
[lints]
1519
workspace = true

embeddings/src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ struct Args {
3535
#[clap(short)]
3636
normalise: bool,
3737
/// Disable offloading layers to the gpu
38-
#[cfg(feature = "cuda")]
38+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
3939
#[clap(long)]
4040
disable_gpu: bool,
4141
}
@@ -78,7 +78,7 @@ fn main() -> Result<()> {
7878
model,
7979
prompt,
8080
normalise,
81-
#[cfg(feature = "cuda")]
81+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
8282
disable_gpu,
8383
} = Args::parse();
8484

@@ -87,13 +87,13 @@ fn main() -> Result<()> {
8787

8888
// offload all layers to the gpu
8989
let model_params = {
90-
#[cfg(feature = "cuda")]
90+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
9191
if !disable_gpu {
9292
LlamaModelParams::default().with_n_gpu_layers(1000)
9393
} else {
9494
LlamaModelParams::default()
9595
}
96-
#[cfg(not(feature = "cuda"))]
96+
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
9797
LlamaModelParams::default()
9898
};
9999

llama-cpp-2/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ tracing = { workspace = true }
1717
[features]
1818
cuda = ["llama-cpp-sys-2/cuda"]
1919
metal = ["llama-cpp-sys-2/metal"]
20+
vulkan = ["llama-cpp-sys-2/vulkan"]
21+
native = ["llama-cpp-sys-2/native"]
2022
sampler = []
2123

2224
[target.'cfg(target_feature = "avx")'.dependencies]

llama-cpp-sys-2/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,5 @@ cuda = []
6262
f16c = []
6363
fma = []
6464
metal = []
65-
65+
vulkan = []
66+
native = []

llama-cpp-sys-2/build.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
463463
.map(|f| f.unwrap())
464464
.filter(|entry| entry.file_name().to_string_lossy().ends_with(".cu"))
465465
.map(|entry| entry.path());
466-
466+
467467
let template_instances = read_dir(cuda_path.join("template-instances"))
468468
.unwrap()
469469
.map(|f| f.unwrap())
@@ -507,7 +507,7 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
507507
let common = LLAMA_PATH.join("ggml-common.h");
508508

509509
let input_file = File::open(ggml_metal_shader_path).expect("Failed to open input file");
510-
let mut output_file =
510+
let output_file =
511511
File::create(&ggml_metal_shader_out_path).expect("Failed to create output file");
512512

513513
let output = Command::new("sed")
@@ -583,11 +583,35 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
583583
.file(LLAMA_PATH.join("ggml-metal.m"));
584584
}
585585

586+
fn find_windows_vulkan_sdk() -> PathBuf {
587+
// if the vulkan sdk is installed in the standard location then this should be pretty fast,
588+
// but we still must search recursively because we don't know the exact version number.
589+
// if it's installed somewhere else, this will take a while, but it's better than failing.
590+
let vulkan_root = Command::new("powershell.exe")
591+
.arg("-Command")
592+
.arg(r#"
593+
if (test-path -pathtype Container "/VulkanSDK") {
594+
$root = "/VulkanSDK"
595+
} else {
596+
$root = "/"
597+
}
598+
get-childitem -path $root -recurse -filter "vulkan.h" 2>$null | foreach-object { $_.directory.parent.parent.fullname }
599+
"#)
600+
.output()
601+
.expect("could not find vulkan.h")
602+
.stdout;
603+
let vulkan_root = String::from_utf8_lossy(
604+
vulkan_root
605+
.split(|c| *c == b'\n')
606+
.next()
607+
.expect("could not find vulkan.h"),
608+
);
609+
PathBuf::from(vulkan_root.trim())
610+
}
611+
586612
fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
587613
println!("Compiling Vulkan GGML..");
588614

589-
// Vulkan gets linked through the ash crate.
590-
591615
if cfg!(debug_assertions) {
592616
cx.define("GGML_VULKAN_DEBUG", None)
593617
.define("GGML_VULKAN_CHECK_RESULTS", None)
@@ -602,12 +626,25 @@ fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
602626

603627
let lib_name = "ggml-vulkan";
604628

605-
cxx.clone()
606-
.include("./thirdparty/Vulkan-Headers/include/")
607-
.include(LLAMA_PATH.as_path())
608-
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
609-
.compile(lib_name);
610-
629+
if cfg!(target_os = "windows") {
630+
let vulkan_root = find_windows_vulkan_sdk();
631+
cxx.clone()
632+
.include(vulkan_root.join("Include"))
633+
.include(LLAMA_PATH.as_path())
634+
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
635+
.compile(lib_name);
636+
println!(
637+
"cargo:rustc-link-search=native={}",
638+
vulkan_root.join("Lib").display()
639+
);
640+
println!("cargo:rustc-link-lib=vulkan-1");
641+
} else {
642+
cxx.clone()
643+
.include(LLAMA_PATH.as_path())
644+
.file(LLAMA_PATH.join("ggml-vulkan.cpp"))
645+
.compile(lib_name);
646+
println!("cargo:rustc-link-lib=vulkan");
647+
}
611648
lib_name
612649
}
613650

@@ -656,6 +693,7 @@ fn main() {
656693
push_warn_flags(&mut cx, &mut cxx);
657694
push_feature_flags(&mut cx, &mut cxx);
658695

696+
#[allow(unused_variables)]
659697
let feat_lib = if cfg!(feature = "vulkan") {
660698
Some(compile_vulkan(&mut cx, &mut cxx))
661699
} else if cfg!(feature = "cuda") {

simple/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ encoding_rs = { workspace = true }
1515
[features]
1616
cuda = ["llama-cpp-2/cuda"]
1717
metal = ["llama-cpp-2/metal"]
18+
native = ["llama-cpp-2/native"]
19+
vulkan = ["llama-cpp-2/vulkan"]
1820

1921
[lints]
2022
workspace = true

simple/src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ struct Args {
4444
#[arg(short = 'o', value_parser = parse_key_val)]
4545
key_value_overrides: Vec<(String, ParamOverrideValue)>,
4646
/// Disable offloading layers to the gpu
47-
#[cfg(feature = "cuda")]
47+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
4848
#[clap(long)]
4949
disable_gpu: bool,
5050
#[arg(short = 's', long, help = "RNG seed (default: 1234)")]
@@ -124,7 +124,7 @@ fn main() -> Result<()> {
124124
model,
125125
prompt,
126126
file,
127-
#[cfg(feature = "cuda")]
127+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
128128
disable_gpu,
129129
key_value_overrides,
130130
seed,
@@ -138,13 +138,13 @@ fn main() -> Result<()> {
138138

139139
// offload all layers to the gpu
140140
let model_params = {
141-
#[cfg(feature = "cuda")]
141+
#[cfg(any(feature = "cuda", feature = "vulkan"))]
142142
if !disable_gpu {
143143
LlamaModelParams::default().with_n_gpu_layers(1000)
144144
} else {
145145
LlamaModelParams::default()
146146
}
147-
#[cfg(not(feature = "cuda"))]
147+
#[cfg(not(any(feature = "cuda", feature = "vulkan")))]
148148
LlamaModelParams::default()
149149
};
150150

0 commit comments

Comments
 (0)