Skip to content

Commit c38980a

Browse files
committed
Merge branch 'master' into esocrok
2 parents c9f5c25 + d72f5f7 commit c38980a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+5261
-357
lines changed

.github/workflows/build-amd.yml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
name: CI (AMD)
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
paths: [
9+
'.github/workflows/build-amd.yml',
10+
'**/CMakeLists.txt',
11+
'**/.cmake',
12+
'**/*.h',
13+
'**/*.hpp',
14+
'**/*.c',
15+
'**/*.cpp',
16+
'**/*.cu',
17+
'**/*.cuh',
18+
'**/*.comp'
19+
]
20+
21+
concurrency:
22+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
23+
cancel-in-progress: true
24+
25+
jobs:
26+
ggml-ci-x64-amd-vulkan:
27+
runs-on: [self-hosted, Linux, X64, AMD]
28+
29+
steps:
30+
- name: Clone
31+
id: checkout
32+
uses: actions/checkout@v4
33+
34+
- name: Test
35+
id: ggml-ci
36+
run: |
37+
vulkaninfo --summary
38+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
39+
40+
ggml-ci-x64-amd-rocm:
41+
runs-on: [self-hosted, Linux, X64, AMD]
42+
43+
steps:
44+
- name: Clone
45+
id: checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Test
49+
id: ggml-ci
50+
run: |
51+
amd-smi static
52+
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
set(CMAKE_SYSTEM_NAME Linux)
2+
set(CMAKE_SYSTEM_PROCESSOR riscv64)
3+
set(CMAKE_SYSTEM_VERSION 1)
4+
5+
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
6+
message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
7+
else()
8+
set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
9+
if (DEFINED ENV{RISCV_ROOT_PATH})
10+
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
11+
else()
12+
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
13+
endif()
14+
15+
set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
16+
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
17+
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
18+
set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
19+
set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
20+
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
21+
endif()
22+
23+
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
24+
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
25+
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
26+
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
27+
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
28+
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
29+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")

common/chat.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1616,17 +1616,36 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
16161616
);
16171617
});
16181618

1619-
auto recipient_in_role = builder.add_rule("recipient_in_role",
1620-
"\"<|start|>assistant\"? \" to=functions.\" ( " +
1621-
string_join(tool_rules_recipient_in_role, " | ") + " )"
1622-
);
1623-
16241619
auto recipient_in_channel = builder.add_rule("recipient_in_channel",
16251620
channel + " \" to=functions.\" ( " +
16261621
string_join(tool_rules_recipient_in_channel, " | ") + " )"
16271622
);
16281623

1629-
builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
1624+
if (data.grammar_lazy) {
1625+
auto recipient_in_role = builder.add_rule("recipient_in_role",
1626+
"\"<|start|>assistant\"? \" to=functions.\" ( " +
1627+
string_join(tool_rules_recipient_in_role, " | ") + " )"
1628+
);
1629+
1630+
builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
1631+
} else {
1632+
auto not_end = builder.add_rule("not-end",
1633+
"[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
1634+
auto analysis = builder.add_rule("analysis",
1635+
"\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1636+
auto commentary = builder.add_rule("commentary",
1637+
"\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
1638+
1639+
auto recipient_in_role = builder.add_rule("recipient_in_role",
1640+
"\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
1641+
);
1642+
1643+
builder.add_rule("root",
1644+
"( " + analysis + " \"<|start|>assistant\" )? " +
1645+
"( " + commentary + " \"<|start|>assistant\" )? " +
1646+
"( " + recipient_in_role + " | " + recipient_in_channel + " )"
1647+
);
1648+
}
16301649

16311650
// Trigger on tool calls that appear in the commentary channel
16321651
data.grammar_triggers.push_back({

docs/build-riscv64-spacemit.md

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
> [!IMPORTANT]
2+
> This build documentation is specific only to RISC-V SpacemiT SOCs.
3+
4+
## Build llama.cpp locally (for riscv64)
5+
6+
1. Prepare Toolchain For RISCV
7+
~~~
8+
wget https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz
9+
~~~
10+
11+
2. Build
12+
Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
13+
```bash
14+
15+
cmake -B build \
16+
-DCMAKE_BUILD_TYPE=Release \
17+
-DGGML_CPU_RISCV64_SPACEMIT=ON \
18+
-DLLAMA_CURL=OFF \
19+
-DGGML_RVV=ON \
20+
-DGGML_RV_ZFH=ON \
21+
-DGGML_RV_ZICBOP=ON \
22+
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
23+
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
24+
-DCMAKE_INSTALL_PREFIX=build/installed
25+
26+
cmake --build build --parallel $(nproc) --config Release
27+
28+
pushd build
29+
make install
30+
popd
31+
```
32+
33+
## Simulation
34+
You can use QEMU to perform emulation on non-RISC-V architectures.
35+
36+
1. Download QEMU
37+
~~~
38+
wget https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v0.0.14.tar.gz
39+
~~~
40+
41+
2. Run Simulation
42+
After build your llama.cpp, you can run the executable file via QEMU for simulation, for example:
43+
~~~
44+
export QEMU_ROOT_PATH={your QEMU file path}
45+
export RISCV_ROOT_PATH_IME1={your RISC-V compiler path}
46+
47+
${QEMU_ROOT_PATH}/bin/qemu-riscv64 -L ${RISCV_ROOT_PATH_IME1}/sysroot -cpu max,vlen=256,elen=64,vext_spec=v1.0 ${PWD}/build/bin/llama-cli -m ${PWD}/models/Qwen2.5-0.5B-Instruct-Q4_0.gguf -t 1
48+
~~~
49+
## Performance
50+
#### Quantization Support For Matrix
51+
~~~
52+
model name : Spacemit(R) X60
53+
isa : rv64imafdcv_zicbom_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zfhmin_zca_zcd_zba_zbb_zbc_zbs_zkt_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt
54+
mmu : sv39
55+
uarch : spacemit,x60
56+
mvendorid : 0x710
57+
marchid : 0x8000000058000001
58+
~~~
59+
60+
Q4_0
61+
| Model | Size | Params | backend | threads | test | t/s |
62+
| -----------| -------- | ------ | ------- | ------- | ---- |------|
63+
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | pp512|64.12 ± 0.26|
64+
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | tg128|10.03 ± 0.01|
65+
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | pp512|24.16 ± 0.02|
66+
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | tg128|3.83 ± 0.06|
67+
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | pp512|12.08 ± 0.02|
68+
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | tg128|2.23 ± 0.02|
69+
70+
Q4_1
71+
| Model | Size | Params | backend | threads | test | t/s |
72+
| -----------| -------- | ------ | ------- | ------- | ---- |------|
73+
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | pp512|62.07 ± 0.12|
74+
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | tg128|9.91 ± 0.01|
75+
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | pp512|22.95 ± 0.25|
76+
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | tg128|4.01 ± 0.15|
77+
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | pp512|11.55 ± 0.16|
78+
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | tg128|2.25 ± 0.04|
79+
80+
81+
Q4_K
82+
| Model | Size | Params | backend | threads | test | t/s |
83+
| -----------| -------- | ------ | ------- | ------- | ---- |------|
84+
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | pp512|9.29 ± 0.05|
85+
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | tg128|5.67 ± 0.04|
86+
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | pp512|10.38 ± 0.10|
87+
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | tg128|3.17 ± 0.08|
88+
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | pp512|4.23 ± 0.04|
89+
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | tg128|1.73 ± 0.00|

ggml/src/ggml-backend-reg.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,10 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
135135
return p;
136136
}
137137

138+
static const char * dl_error() {
139+
return "";
140+
}
141+
138142
#else
139143

140144
using dl_handle = void;
@@ -155,6 +159,11 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
155159
return dlsym(handle, name);
156160
}
157161

162+
static const char * dl_error() {
163+
const char *rslt = dlerror();
164+
return rslt != nullptr ? rslt : "";
165+
}
166+
158167
#endif
159168

160169
using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
@@ -240,7 +249,7 @@ struct ggml_backend_registry {
240249
dl_handle_ptr handle { dl_load_library(path) };
241250
if (!handle) {
242251
if (!silent) {
243-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
252+
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
244253
}
245254
return nullptr;
246255
}
@@ -531,7 +540,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
531540
if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
532541
dl_handle_ptr handle { dl_load_library(entry) };
533542
if (!handle && !silent) {
534-
GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
543+
GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
535544
}
536545
if (handle) {
537546
auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
# include "kleidiai/kleidiai.h"
1919
#endif
2020

21+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
22+
# include "spacemit/ime.h"
23+
#endif
24+
2125
#if defined(_WIN32)
2226
# define WIN32_LEAN_AND_MEAN
2327
# ifndef NOMINMAX
@@ -45,6 +49,12 @@ std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_type
4549
// }
4650
// #endif
4751

52+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
53+
if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54+
bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
55+
}
56+
#endif
57+
4858
#ifdef GGML_USE_CPU_KLEIDIAI
4959
if (ggml_backend_cpu_kleidiai_buffer_type()) {
5060
bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());

0 commit comments

Comments
 (0)