Skip to content

Commit da3a933

Browse files
Merge pull request #21 from ggml-org/master
Sync with upstream
2 parents ab621d2 + 1be76e4 commit da3a933

File tree

30 files changed

+1194
-1034
lines changed

30 files changed

+1194
-1034
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
name: Build on Linux using cross-compiler
2+
on:
3+
workflow_dispatch:
4+
workflow_call:
5+
6+
jobs:
7+
ubuntu-latest-riscv64-cpu-cross:
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
- name: Setup Riscv
13+
run: |
14+
sudo dpkg --add-architecture riscv64
15+
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
16+
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
17+
sudo apt-get clean
18+
sudo apt-get update
19+
sudo apt-get install -y --no-install-recommends \
20+
build-essential \
21+
gcc-14-riscv64-linux-gnu \
22+
g++-14-riscv64-linux-gnu
23+
24+
- name: Build
25+
run: |
26+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
27+
-DGGML_OPENMP=OFF \
28+
-DLLAMA_BUILD_EXAMPLES=ON \
29+
-DLLAMA_BUILD_TESTS=OFF \
30+
-DCMAKE_SYSTEM_NAME=Linux \
31+
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
32+
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
33+
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
34+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
35+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
36+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
37+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
38+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
39+
40+
cmake --build build --config Release -j $(nproc)
41+
42+
ubuntu-latest-riscv64-vulkan-cross:
43+
runs-on: ubuntu-latest
44+
45+
steps:
46+
- uses: actions/checkout@v4
47+
with:
48+
fetch-depth: 0
49+
50+
- name: Setup Riscv
51+
run: |
52+
sudo dpkg --add-architecture riscv64
53+
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
54+
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
55+
sudo apt-get clean
56+
sudo apt-get update
57+
sudo apt-get install -y --no-install-recommends \
58+
build-essential \
59+
glslc \
60+
gcc-14-riscv64-linux-gnu \
61+
g++-14-riscv64-linux-gnu \
62+
libvulkan-dev:riscv64
63+
64+
- name: Build
65+
run: |
66+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
67+
-DGGML_VULKAN=ON \
68+
-DGGML_OPENMP=OFF \
69+
-DLLAMA_BUILD_EXAMPLES=ON \
70+
-DLLAMA_BUILD_TESTS=OFF \
71+
-DCMAKE_SYSTEM_NAME=Linux \
72+
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
73+
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
74+
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
75+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
76+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
77+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
78+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
79+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
80+
81+
cmake --build build --config Release -j $(nproc)
82+
83+
ubuntu-latest-arm64-vulkan-cross:
84+
runs-on: ubuntu-latest
85+
86+
steps:
87+
- uses: actions/checkout@v4
88+
with:
89+
fetch-depth: 0
90+
91+
- name: Setup Arm64
92+
run: |
93+
sudo dpkg --add-architecture arm64
94+
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
95+
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
96+
sudo apt-get clean
97+
sudo apt-get update
98+
sudo apt-get install -y --no-install-recommends \
99+
build-essential \
100+
glslc \
101+
crossbuild-essential-arm64 \
102+
libvulkan-dev:arm64
103+
104+
- name: Build
105+
run: |
106+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
107+
-DGGML_VULKAN=ON \
108+
-DGGML_OPENMP=OFF \
109+
-DLLAMA_BUILD_EXAMPLES=ON \
110+
-DLLAMA_BUILD_TESTS=OFF \
111+
-DCMAKE_SYSTEM_NAME=Linux \
112+
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
113+
-DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
114+
-DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
115+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
116+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
117+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
118+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
119+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
120+
121+
cmake --build build --config Release -j $(nproc)

.github/workflows/build.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
13+
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
1616
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
@@ -606,6 +606,9 @@ jobs:
606606
-DGGML_SYCL_F16=ON
607607
cmake --build build --config Release -j $(nproc)
608608
609+
build-linux-cross:
610+
uses: ./.github/workflows/build-linux-cross.yml
611+
609612
macOS-latest-cmake-ios:
610613
runs-on: macos-latest
611614

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,35 @@ If your issue is with model generation quality, then please at least scan the fo
530530
- [Aligning language models to follow instructions](https://openai.com/research/instruction-following)
531531
- [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155)
532532
533+
## XCFramework
534+
The XCFramework is a precompiled version of the library for iOS, visionOS, tvOS,
535+
and macOS. It can be used in Swift projects without the need to compile the
536+
library from source. For example:
537+
```swift
538+
// swift-tools-version: 5.10
539+
// The swift-tools-version declares the minimum version of Swift required to build this package.
540+
541+
import PackageDescription
542+
543+
let package = Package(
544+
name: "MyLlamaPackage",
545+
targets: [
546+
.executableTarget(
547+
name: "MyLlamaPackage",
548+
dependencies: [
549+
"LlamaFramework"
550+
]),
551+
.binaryTarget(
552+
name: "LlamaFramework",
553+
url: "https://github.com/ggml-org/llama.cpp/releases/download/b5046/llama-b5046-xcframework.zip",
554+
checksum: "c19be78b5f00d8d29a25da41042cb7afa094cbf6280a225abe614b03b20029ab"
555+
)
556+
]
557+
)
558+
```
559+
The above example is using an intermediate build `b5046` of the library. This can be modified
560+
to use a different version by changing the URL and checksum.
561+
533562
## Completions
534563
Command-line completion is available for some environments.
535564

ci/run.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
5959
export ONEAPI_DEVICE_SELECTOR="level_zero:0"
6060
# Enable sysman for correct memory reporting
6161
export ZES_ENABLE_SYSMAN=1
62+
# to circumvent precision issues on CPY operations
63+
export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt"
6264
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
6365
fi
6466

common/minja/minja.hpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2606,14 +2606,18 @@ inline std::shared_ptr<Context> Context::builtins() {
26062606
auto & text = args.at("text");
26072607
return text.is_null() ? text : Value(strip(text.get<std::string>()));
26082608
}));
2609-
globals.set("lower", simple_function("lower", { "text" }, [](const std::shared_ptr<Context> &, Value & args) {
2610-
auto text = args.at("text");
2611-
if (text.is_null()) return text;
2612-
std::string res;
2613-
auto str = text.get<std::string>();
2614-
std::transform(str.begin(), str.end(), std::back_inserter(res), ::tolower);
2615-
return Value(res);
2616-
}));
2609+
auto char_transform_function = [](const std::string & name, const std::function<char(char)> & fn) {
2610+
return simple_function(name, { "text" }, [=](const std::shared_ptr<Context> &, Value & args) {
2611+
auto text = args.at("text");
2612+
if (text.is_null()) return text;
2613+
std::string res;
2614+
auto str = text.get<std::string>();
2615+
std::transform(str.begin(), str.end(), std::back_inserter(res), fn);
2616+
return Value(res);
2617+
});
2618+
};
2619+
globals.set("lower", char_transform_function("lower", ::tolower));
2620+
globals.set("upper", char_transform_function("upper", ::toupper));
26172621
globals.set("default", Value::callable([=](const std::shared_ptr<Context> &, ArgumentsValue & args) {
26182622
args.expectArgs("default", {2, 3}, {0, 1});
26192623
auto & value = args.args[0];

docs/backend/OPENCL.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,13 @@ A Snapdragon X Elite device with Windows 11 Arm64 is used. Make sure the followi
145145
* Clang 19
146146
* Ninja
147147
* Visual Studio 2022
148+
* Powershell 7
148149

149-
Powershell is used for the following instructions.
150+
Visual Studio provides necessary headers and libraries although it is not directly used for building.
151+
Alternatively, Visual Studio Build Tools can be installed instead of the full Visual Studio.
152+
153+
Powershell 7 is used for the following commands.
154+
If an older version of Powershell is used, these commands may not work as they are.
150155

151156
### I. Setup Environment
152157

@@ -196,10 +201,9 @@ ninja
196201

197202
## Known Issues
198203

199-
- Qwen2.5 0.5B model produces gibberish output with Adreno kernels.
204+
- Currently OpenCL backend does not work on Adreno 6xx GPUs.
200205

201206
## TODO
202207

203-
- Fix Qwen2.5 0.5B
204208
- Optimization for Q6_K
205209
- Support and optimization for Q4_K

docs/backend/SYCL.md

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,10 @@ cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -
302302
cmake --build build --config Release -j -v
303303
```
304304

305+
It is possible to come across some precision issues when running tests that stem from using faster
306+
instructions, which can be circumvented by setting the environment variable `SYCL_PROGRAM_COMPILE_OPTIONS`
307+
as `-cl-fp32-correctly-rounded-divide-sqrt`
308+
305309
#### Nvidia GPU
306310

307311
The SYCL backend depends on [oneMath](https://github.com/uxlfoundation/oneMath) for Nvidia and AMD devices.
@@ -322,6 +326,9 @@ cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=
322326
cmake --build build --config Release -j -v
323327
```
324328

329+
It is possible to come across some precision issues when running tests that stem from using faster
330+
instructions, which can be circumvented by passing the `-fno-fast-math` flag to the compiler.
331+
325332
#### AMD GPU
326333

327334
The SYCL backend depends on [oneMath](https://github.com/uxlfoundation/oneMath) for Nvidia and AMD devices.
@@ -468,6 +475,12 @@ b. Enable oneAPI running environment:
468475
"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
469476
```
470477

478+
- if you are using Powershell, enable the runtime environment with the following:
479+
480+
```
481+
cmd.exe "/K" '"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" && powershell'
482+
```
483+
471484
c. Verify installation
472485

473486
In the oneAPI command line, run the following to print the available SYCL devices:
@@ -498,13 +511,13 @@ You could download the release package for Windows directly, which including bin
498511

499512
Choose one of following methods to build from source code.
500513

501-
1. Script
514+
#### 1. Script
502515

503516
```sh
504517
.\examples\sycl\win-build-sycl.bat
505518
```
506519

507-
2. CMake
520+
#### 2. CMake
508521

509522
On the oneAPI command line window, step into the llama.cpp main directory and run the following:
510523

@@ -533,13 +546,84 @@ cmake --preset x64-windows-sycl-debug
533546
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
534547
```
535548

536-
3. Visual Studio
549+
#### 3. Visual Studio
550+
551+
You have two options to use Visual Studio to build llama.cpp:
552+
- As CMake Project using CMake presets.
553+
- Creating a Visual Studio solution to handle the project.
554+
555+
**Note**:
556+
557+
All following commands are executed in PowerShell.
537558

538-
You can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
559+
##### - Open as a CMake Project
560+
561+
You can use Visual Studio to open the `llama.cpp` folder directly as a CMake project. Before compiling, select one of the SYCL CMake presets:
562+
563+
- `x64-windows-sycl-release`
564+
565+
- `x64-windows-sycl-debug`
539566

540567
*Notes:*
568+
- For a minimal experimental setup, you can build only the inference executable using:
569+
570+
```Powershell
571+
cmake --build build --config Release -j --target llama-cli
572+
```
573+
574+
##### - Generating a Visual Studio Solution
575+
576+
You can use Visual Studio solution to build and work on llama.cpp on Windows. You need to convert the CMake Project into a `.sln` file.
577+
578+
If you want to use the Intel C++ Compiler for the entire `llama.cpp` project, run the following command:
579+
580+
```Powershell
581+
cmake -B build -G "Visual Studio 17 2022" -T "Intel C++ Compiler 2025" -A x64 -DGGML_SYCL=ON -DCMAKE_BUILD_TYPE=Release
582+
```
583+
584+
If you prefer to use the Intel C++ Compiler only for `ggml-sycl`, ensure that `ggml` and its backend libraries are built as shared libraries ( i.e. `-DBUILD_SHARED_LIBRARIES=ON`, this is default behaviour):
585+
586+
```Powershell
587+
cmake -B build -G "Visual Studio 17 2022" -A x64 -DGGML_SYCL=ON -DCMAKE_BUILD_TYPE=Release \
588+
-DSYCL_INCLUDE_DIR="C:\Program Files (x86)\Intel\oneAPI\compiler\latest\include" \
589+
-DSYCL_LIBRARY_DIR="C:\Program Files (x86)\Intel\oneAPI\compiler\latest\lib"
590+
```
591+
592+
If successful the build files have been written to: *path/to/llama.cpp/build*
593+
Open the project file **build/llama.cpp.sln** with Visual Studio.
594+
595+
Once the Visual Studio solution is created, follow these steps:
596+
597+
1. Open the solution in Visual Studio.
598+
599+
2. Right-click on `ggml-sycl` and select **Properties**.
600+
601+
3. In the left column, expand **C/C++** and select **DPC++**.
602+
603+
4. In the right panel, find **Enable SYCL Offload** and set it to `Yes`.
604+
605+
5. Apply the changes and save.
606+
607+
608+
*Navigation Path:*
609+
610+
```
611+
Properties -> C/C++ -> DPC++ -> Enable SYCL Offload (Yes)
612+
```
613+
614+
Now, you can build `llama.cpp` with the SYCL backend as a Visual Studio project.
615+
To do it from menu: `Build -> Build Solution`.
616+
Once it is completed, final results will be in **build/Release/bin**
617+
618+
*Additional Note*
619+
620+
- You can avoid specifying `SYCL_INCLUDE_DIR` and `SYCL_LIBRARY_DIR` in the CMake command by setting the environment variables:
621+
622+
- `SYCL_INCLUDE_DIR_HINT`
623+
624+
- `SYCL_LIBRARY_DIR_HINT`
541625

542-
- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`.
626+
- Above instruction has been tested with Visual Studio 17 Community edition and oneAPI 2025.0. We expect them to work also with future version if the instructions are adapted accordingly.
543627

544628
### III. Run the inference
545629

0 commit comments

Comments
 (0)