Skip to content

Commit 6f8d979

Browse files
co-sevenalex-spacemit
authored andcommitted
ggml: add spacemit backend
Change-Id: I249bdc043485d815a9c351867137bc1e27cc2e23
1 parent e71d48e commit 6f8d979

File tree

11 files changed

+4952
-4
lines changed

11 files changed

+4952
-4
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright (c) 2023 SpacemiT. All rights reserved.
2+
set(CMAKE_SYSTEM_NAME Linux)
3+
SET(CMAKE_SYSTEM_PROCESSOR riscv64)
4+
set(CMAKE_SYSTEM_VERSION 1)
5+
6+
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
7+
message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
8+
else()
9+
set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
10+
if(DEFINED ENV{RISCV_ROOT_PATH})
11+
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
12+
else()
13+
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
14+
endif()
15+
16+
set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
17+
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
18+
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
19+
set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
20+
set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
21+
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
22+
endif()
23+
24+
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
25+
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
26+
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
27+
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
28+
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
29+
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
30+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")
31+
add_definitions(-D__fp16=_Float16)

docs/build-riscv64-spacemit.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
> [!IMPORTANT]
2+
> This build documentation is specific only to RISC-V SpacemiT SOCs.
3+
4+
## Build llama.cpp locally (for riscv64)
5+
6+
1. Prepare Toolchain For RISCV
7+
~~~
8+
wget https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz
9+
~~~
10+
11+
2. Build
12+
Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
13+
```bash
14+
15+
cmake -B build-riscv64-spacemit \
16+
-DCMAKE_BUILD_TYPE=Release \
17+
-DGGML_CPU_RISCV64_SPACEMIT=ON \
18+
-DLLAMA_CURL=OFF \
19+
-DGGML_RV_ZFH=ON \
20+
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
21+
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
22+
-DCMAKE_INSTALL_PREFIX=build-riscv64-spacemit/installed
23+
24+
cmake --build build-riscv64-spacemit --parallel $(nproc) --config Release
25+
26+
pushd build-riscv64-spacemit
27+
make install
28+
popd
29+
```
30+
31+
## Simulation
32+
You can use QEMU to perform emulation on non-RISC-V architectures.
33+
34+
1. Download QEMU
35+
~~~
36+
wget https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v0.0.14.tar.gz
37+
~~~
38+
39+
2. Run Simulation
40+
After build your llama.cpp, you can run the executable file via QEMU for simulation, for example:
41+
~~~
42+
export QEMU_ROOT_PATH={your QEMU file path}
43+
export RISCV_ROOT_PATH_IME1={your RISC-V compiler path}
44+
45+
${QEMU_ROOT_PATH}/bin/qemu-riscv64 -L ${RISCV_ROOT_PATH_IME1}/sysroot -cpu max,vlen=256,elen=64,vext_spec=v1.0 ${PWD}/build-riscv64-spacemit/bin/llama-cli -m ${PWD}/models/Qwen2.5-0.5B-Instruct-Q4_0.gguf -t 1
46+
~~~
47+
## Performance
48+
#### Quantization Support For Matrix
49+
~~~
50+
model name : Spacemit(R) X60
51+
isa : rv64imafdcv_zicbom_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zfhmin_zca_zcd_zba_zbb_zbc_zbs_zkt_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt
52+
mmu : sv39
53+
uarch : spacemit,x60
54+
mvendorid : 0x710
55+
marchid : 0x8000000058000001
56+
~~~
57+
58+
Q4_0
59+
| Model | Size | Params | backend | threads | test | t/s |
60+
| -----------| -------- | ------ | ------- | ------- | ---- |------|
61+
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | pp512|64.12 ± 0.26|
62+
Qwen2.5 0.5B |403.20 MiB|630.17 M| cpu | 4 | tg128|10.03 ± 0.01|
63+
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | pp512|24.16 ± 0.02|
64+
Qwen2.5 1.5B |1011.16 MiB| 1.78 B | cpu | 4 | tg128|3.83 ± 0.06|
65+
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | pp512|12.08 ± 0.02|
66+
Qwen2.5 3B | 1.86 GiB | 3.40 B | cpu | 4 | tg128|2.23 ± 0.02|
67+
68+
Q4_1
69+
| Model | Size | Params | backend | threads | test | t/s |
70+
| -----------| -------- | ------ | ------- | ------- | ---- |------|
71+
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | pp512|62.07 ± 0.12|
72+
Qwen2.5 0.5B |351.50 MiB|494.03 M| cpu | 4 | tg128|9.91 ± 0.01|
73+
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | pp512|22.95 ± 0.25|
74+
Qwen2.5 1.5B |964.06 MiB| 1.54 B | cpu | 4 | tg128|4.01 ± 0.15|
75+
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | pp512|11.55 ± 0.16|
76+
Qwen2.5 3B | 1.85 GiB | 3.09 B | cpu | 4 | tg128|2.25 ± 0.04|
77+
78+
79+
Q4_K
80+
| Model | Size | Params | backend | threads | test | t/s |
81+
| -----------| -------- | ------ | ------- | ------- | ---- |------|
82+
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | pp512|9.29 ± 0.05|
83+
Qwen2.5 0.5B |462.96 MiB|630.17 M| cpu | 4 | tg128|5.67 ± 0.04|
84+
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | pp512|10.38 ± 0.10|
85+
Qwen2.5 1.5B | 1.04 GiB | 1.78 B | cpu | 4 | tg128|3.17 ± 0.08|
86+
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | pp512|4.23 ± 0.04|
87+
Qwen2.5 3B | 1.95 GiB | 3.40 B | cpu | 4 | tg128|1.73 ± 0.00|

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
433433
ggml-cpu/arch/riscv/quants.c
434434
ggml-cpu/arch/riscv/repack.cpp
435435
)
436-
if (GGML_RVV)
436+
if (GGML_CPU_RISCV64_SPACEMIT)
437+
list(APPEND ARCH_FLAGS -march=rv64gcv_zfh_zba_zicbop -mabi=lp64d -DGGML_RV_ZFH -D${RISCV64_SPACEMIT_IME_SPEC})
438+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_RISCV64_SPACEMIT)
439+
list(APPEND GGML_CPU_SOURCES
440+
ggml-cpu/spacemit/ggml_spacemit_ime.cpp
441+
ggml-cpu/spacemit/ggml_spacemit_ime.h
442+
ggml-cpu/spacemit/ggml_spacemit_ime_kernels.cpp
443+
ggml-cpu/spacemit/ggml_spacemit_ime_kernels.h
444+
)
445+
elseif (GGML_RVV)
437446
if (GGML_XTHEADVECTOR)
438447
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
439448
elseif (GGML_RV_ZFH)

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3209,6 +3209,26 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
32093209
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
32103210
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
32113211
}
3212+
#elif defined(__riscv) && defined(__riscv_v) && defined(__riscv_zfh)
3213+
int64_t n_loop = n;
3214+
__asm__ volatile(
3215+
"LOOP%=: \n\t"
3216+
"vsetvli t0, %[n], e32, m4,tu,mu \n\t"
3217+
"slli t1, t0, 1 \n\t"
3218+
"slli t2, t0, 2 \n\t"
3219+
"vle32.v v0, (%[IN]) \n\t"
3220+
"add %[IN], %[IN], t2 \n\t"
3221+
"vsetvli t0, %[n], e16, m2,tu,mu \n\t"
3222+
"vfncvt.f.f.w v4, v0 \n\t"
3223+
"vse16.v v4, (%[DST]) \n\t"
3224+
"add %[DST], %[DST], t1 \n\t"
3225+
"sub %[n], %[n], t0 \n\t"
3226+
"bnez %[n], LOOP%= \n\t"
3227+
3228+
: [ IN ] "+r"(x), [ DST ] "+r"(y), [ n ] "+r"(n_loop)
3229+
:
3230+
: "cc", "t0", "t1", "t2");
3231+
i += n;
32123232
#endif
32133233
for (; i < n; ++i) {
32143234
y[i] = GGML_CPU_FP32_TO_FP16(x[i]);
@@ -3250,6 +3270,26 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
32503270
float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0);
32513271
vec_xst(v_yh, 0, (float *)(y + i));
32523272
}
3273+
#elif defined(__riscv) && defined(__riscv_v) && defined(__riscv_zfh)
3274+
int64_t n_loop = n;
3275+
__asm__ volatile(
3276+
"LOOP%=: \n\t"
3277+
"vsetvli t0, %[n], e16, m2,tu,mu \n\t"
3278+
"slli t1, t0, 2 \n\t"
3279+
"slli t2, t0, 1 \n\t"
3280+
"vle16.v v0, (%[IN]) \n\t"
3281+
"add %[IN], %[IN], t2 \n\t"
3282+
"vfwcvt.f.f.v v4, v0 \n\t"
3283+
"vsetvli t0, %[n], e32, m4,tu,mu \n\t"
3284+
"vse32.v v4, (%[DST]) \n\t"
3285+
"add %[DST], %[DST], t1 \n\t"
3286+
"sub %[n], %[n], t0 \n\t"
3287+
"bnez %[n], LOOP%= \n\t"
3288+
3289+
: [ IN ] "+r"(x), [ DST ] "+r"(y), [ n ] "+r"(n_loop)
3290+
:
3291+
: "cc", "t0", "t1", "t2");
3292+
i += n;
32533293
#endif
32543294

32553295
for (; i < n; ++i) {

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
# include "kleidiai/kleidiai.h"
1919
#endif
2020

21+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
22+
# include "spacemit/ggml_spacemit_ime.h"
23+
#endif
24+
2125
#if defined(_WIN32)
2226
# define WIN32_LEAN_AND_MEAN
2327
# ifndef NOMINMAX
@@ -45,6 +49,12 @@ std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_type
4549
}
4650
#endif
4751

52+
#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
53+
if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
54+
bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
55+
}
56+
#endif
57+
4858
#ifdef GGML_USE_CPU_KLEIDIAI
4959
if (ggml_backend_cpu_kleidiai_buffer_type()) {
5060
bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());

0 commit comments

Comments
 (0)