Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion llama-cpp-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ tracing = { workspace = true }
encoding_rs = { workspace = true }

[features]
default = ["openmp"]
default = ["openmp", "android-shared-stdcxx"]
cuda = ["llama-cpp-sys-2/cuda"]
metal = ["llama-cpp-sys-2/metal"]
dynamic-link = ["llama-cpp-sys-2/dynamic-link"]
vulkan = ["llama-cpp-sys-2/vulkan"]
native = ["llama-cpp-sys-2/native"]
openmp = ["llama-cpp-sys-2/openmp"]
sampler = []
# Only has an impact on Android.
android-shared-stdcxx = ["llama-cpp-sys-2/shared-stdcxx"]


[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
Expand Down
6 changes: 3 additions & 3 deletions llama-cpp-2/src/model.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//! A safe wrapper around `llama_model`.
use std::ffi::CString;
use std::ffi::{c_char, CString};
use std::num::NonZeroU16;
use std::os::raw::c_int;
use std::path::Path;
Expand Down Expand Up @@ -565,7 +565,7 @@ impl LlamaModel {
chat.as_ptr(),
chat.len(),
add_ass,
buff.as_mut_ptr().cast::<i8>(),
buff.as_mut_ptr().cast::<c_char>(),
buff.len().try_into().expect("Buffer size exceeds i32::MAX"),
)
};
Expand All @@ -579,7 +579,7 @@ impl LlamaModel {
chat.as_ptr(),
chat.len(),
add_ass,
buff.as_mut_ptr().cast::<i8>(),
buff.as_mut_ptr().cast::<c_char>(),
buff.len().try_into().expect("Buffer size exceeds i32::MAX"),
)
};
Expand Down
12 changes: 2 additions & 10 deletions llama-cpp-2/src/sampling.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Safe wrapper around `llama_sampler`.

use std::borrow::Borrow;
use std::ffi::CString;
use std::ffi::{c_char, CString};
use std::fmt::{Debug, Formatter};

use crate::context::LlamaContext;
Expand All @@ -20,14 +20,6 @@ impl Debug for LlamaSampler {
}
}

// this is needed for the dry sampler to typecheck on android
// ...because what is normally an i8, is an u8
#[cfg(target_os = "android")]
type CChar = u8;

#[cfg(not(target_os = "android"))]
type CChar = i8;

impl LlamaSampler {
/// Sample and accept a token from the idx-th output of the last evaluation
#[must_use]
Expand Down Expand Up @@ -266,7 +258,7 @@ impl LlamaSampler {
.into_iter()
.map(|s| CString::new(s.as_ref()).expect("A sequence breaker contains null bytes"))
.collect();
let mut seq_breaker_pointers: Vec<*const CChar> =
let mut seq_breaker_pointers: Vec<*const c_char> =
seq_breakers.iter().map(|s| s.as_ptr()).collect();

let sampler = unsafe {
Expand Down
2 changes: 2 additions & 0 deletions llama-cpp-sys-2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,5 @@ dynamic-link = []
vulkan = []
native = []
openmp = []
# Only has an impact on Android.
shared-stdcxx = []
43 changes: 35 additions & 8 deletions llama-cpp-sys-2/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,22 +229,44 @@ fn main() {
config.static_crt(static_crt);
}

if target.contains("android") && target.contains("aarch64") {
if target.contains("android") {
// build flags for android taken from this doc
// https://github.com/ggerganov/llama.cpp/blob/master/docs/android.md
let android_ndk = env::var("ANDROID_NDK")
.expect("Please install Android NDK and ensure that ANDROID_NDK env variable is set");

println!("cargo::rerun-if-env-changed=ANDROID_NDK");

config.define(
"CMAKE_TOOLCHAIN_FILE",
format!("{android_ndk}/build/cmake/android.toolchain.cmake"),
);
config.define("ANDROID_ABI", "arm64-v8a");
config.define("ANDROID_PLATFORM", "android-28");
config.define("CMAKE_SYSTEM_PROCESSOR", "arm64");
config.define("CMAKE_C_FLAGS", "-march=armv8.7a");
config.define("CMAKE_CXX_FLAGS", "-march=armv8.7a");
config.define("GGML_OPENMP", "OFF");
if env::var("ANDROID_PLATFORM").is_ok() {
println!("cargo::rerun-if-env-changed=ANDROID_PLATFORM");
} else {
config.define("ANDROID_PLATFORM", "android-28");
}
if target.contains("aarch64") {
config.cflag("-march=armv8.7a");
config.cxxflag("-march=armv8.7a");
} else if target.contains("armv7") {
config.cflag("-march=armv8.7a");
config.cxxflag("-march=armv8.7a");
} else if target.contains("x86_64") {
config.cflag("-march=x86-64");
config.cxxflag("-march=x86-64");
} else if target.contains("i686") {
config.cflag("-march=i686");
config.cxxflag("-march=i686");
} else {
// Rather than guessing just fail.
panic!("Unsupported Android target {target}");
}
config.define("GGML_LLAMAFILE", "OFF");
if cfg!(feature = "shared-stdcxx") {
println!("cargo:rustc-link-lib=dylib=stdc++");
println!("cargo:rustc-link-lib=c++_shared");
}
}

if cfg!(feature = "vulkan") {
Expand All @@ -266,8 +288,13 @@ fn main() {
config.define("GGML_CUDA", "ON");
}

if cfg!(feature = "openmp") {
// Android doesn't have OpenMP support AFAICT and openmp is a default feature. Do this here
// rather than modifying the defaults in Cargo.toml just in case someone enables the OpenMP feature
// and tries to build for Android anyway.
if cfg!(feature = "openmp") && !target.contains("android") {
config.define("GGML_OPENMP", "ON");
} else {
config.define("GGML_OPENMP", "OFF");
}

// General
Expand Down
Loading