Nexesenex
diff --git a/‎.github/workflows/build-amd.yml‎
Lines changed: 52 additions & 0 deletions b/‎.github/workflows/build-amd.yml‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎cmake/riscv64-spacemit-linux-gnu-gcc.cmake‎
Lines changed: 29 additions & 0 deletions b/‎cmake/riscv64-spacemit-linux-gnu-gcc.cmake‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎common/chat.cpp‎
Lines changed: 25 additions & 6 deletions b/‎common/chat.cpp‎
Lines changed: 25 additions & 6 deletions
diff --git a/‎docs/build-riscv64-spacemit.md‎
Lines changed: 89 additions & 0 deletions b/‎docs/build-riscv64-spacemit.md‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎ggml/src/ggml-backend-reg.cpp‎
Lines changed: 11 additions & 2 deletions b/‎ggml/src/ggml-backend-reg.cpp‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎ggml/src/ggml-cpu/ggml-cpu.cpp‎
Lines changed: 10 additions & 0 deletions b/‎ggml/src/ggml-cpu/ggml-cpu.cpp‎
Lines changed: 10 additions & 0 deletions
@@ -0,0 +1,52 @@
+name: CI (AMD)
+
+on:
+  workflow_dispatch: # allows manual triggering
+  push:
+    branches:
+      - master
+    paths: [
+      '.github/workflows/build-amd.yml',
+      '**/CMakeLists.txt',
+      '**/.cmake',
+      '**/*.h',
+      '**/*.hpp',
+      '**/*.c',
+      '**/*.cpp',
+      '**/*.cu',
+      '**/*.cuh',
+      '**/*.comp'
+    ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  ggml-ci-x64-amd-vulkan:
+    runs-on: [self-hosted, Linux, X64, AMD]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          vulkaninfo --summary
+          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
+
+  ggml-ci-x64-amd-rocm:
+    runs-on: [self-hosted, Linux, X64, AMD]
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v4
+
+      - name: Test
+        id: ggml-ci
+        run: |
+          amd-smi static
+          GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
@@ -0,0 +1,29 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR riscv64)
+set(CMAKE_SYSTEM_VERSION 1)
+
+if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
+    message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
+else()
+    set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
+    if (DEFINED ENV{RISCV_ROOT_PATH})
+        file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
+    else()
+        message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
+    endif()
+
+    set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
+    set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
+    set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
+    set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
+    set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
+    set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
+endif()
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
+set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")
@@ -1616,17 +1616,36 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
                 );
             });
 
-            auto recipient_in_role = builder.add_rule("recipient_in_role",
-                "\"<|start|>assistant\"? \" to=functions.\" ( " +
-                string_join(tool_rules_recipient_in_role, " | ") + " )"
-            );
-
             auto recipient_in_channel = builder.add_rule("recipient_in_channel",
                 channel + " \" to=functions.\" ( " +
                 string_join(tool_rules_recipient_in_channel, " | ") + " )"
             );
 
-            builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
+            if (data.grammar_lazy) {
+                auto recipient_in_role = builder.add_rule("recipient_in_role",
+                    "\"<|start|>assistant\"? \" to=functions.\" ( " +
+                    string_join(tool_rules_recipient_in_role, " | ") + " )"
+                );
+
+                builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
+            } else {
+                auto not_end = builder.add_rule("not-end",
+                    "[^<] | \"<\" [^|] | \"<|\" [^e] | \"<|e\" [^n] | \"<|en\" [^d] | \"<|end\" [^|] | \"<|end|\" [^>]");
+                auto analysis = builder.add_rule("analysis",
+                    "\"<|channel|>analysis<|message|>\" ( " + not_end + " )* \"<|end|>\"");
+                auto commentary = builder.add_rule("commentary",
+                    "\"<|channel|>commentary<|message|>\" ( " + not_end + " )* \"<|end|>\"");
+
+                auto recipient_in_role = builder.add_rule("recipient_in_role",
+                    "\" to=functions.\" ( " + string_join(tool_rules_recipient_in_role, " | ") + " )"
+                );
+
+                builder.add_rule("root",
+                    "( " + analysis + " \"<|start|>assistant\" )? " +
+                    "( " + commentary + " \"<|start|>assistant\" )? " +
+                    "( " + recipient_in_role + " | " + recipient_in_channel + " )"
+                );
+            }
 
             // Trigger on tool calls that appear in the commentary channel
             data.grammar_triggers.push_back({
 
@@ -0,0 +1,89 @@
+> [!IMPORTANT]
+> This build documentation is specific only to RISC-V SpacemiT SOCs.
+
+## Build llama.cpp locally (for riscv64)
+
+1. Prepare Toolchain For RISCV
+~~~
+wget https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v1.1.2.tar.xz
+~~~
+
+2. Build
+Below is the build script: it requires utilizing RISC-V vector instructions for acceleration. Ensure the `GGML_CPU_RISCV64_SPACEMIT` compilation option is enabled. The currently supported optimization version is `RISCV64_SPACEMIT_IME1`, corresponding to the `RISCV64_SPACEMIT_IME_SPEC` compilation option. Compiler configurations are defined in the `riscv64-spacemit-linux-gnu-gcc.cmake` file. Please ensure you have installed the RISC-V compiler and set the environment variable via `export RISCV_ROOT_PATH={your_compiler_path}`.
+```bash
+
+cmake -B build \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DGGML_CPU_RISCV64_SPACEMIT=ON \
+    -DLLAMA_CURL=OFF \
+    -DGGML_RVV=ON \
+    -DGGML_RV_ZFH=ON \
+    -DGGML_RV_ZICBOP=ON \
+    -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
+    -DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake \
+    -DCMAKE_INSTALL_PREFIX=build/installed
+
+cmake --build build --parallel $(nproc) --config Release
+
+pushd build
+make install
+popd
+```
+
+## Simulation
+You can use QEMU to perform emulation on non-RISC-V architectures.
+
+1. Download QEMU
+~~~
+wget https://archive.spacemit.com/spacemit-ai/qemu/jdsk-qemu-v0.0.14.tar.gz
+~~~
+
+2. Run Simulation
+After build your llama.cpp, you can run the executable file via QEMU for simulation, for example:
+~~~
+export QEMU_ROOT_PATH={your QEMU file path}
+export RISCV_ROOT_PATH_IME1={your RISC-V compiler path}
+
+${QEMU_ROOT_PATH}/bin/qemu-riscv64 -L ${RISCV_ROOT_PATH_IME1}/sysroot -cpu max,vlen=256,elen=64,vext_spec=v1.0 ${PWD}/build/bin/llama-cli -m ${PWD}/models/Qwen2.5-0.5B-Instruct-Q4_0.gguf -t 1
+~~~
+## Performance
+#### Quantization Support For Matrix
+~~~
+model name      : Spacemit(R) X60
+isa             : rv64imafdcv_zicbom_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zfhmin_zca_zcd_zba_zbb_zbc_zbs_zkt_zve32f_zve32x_zve64d_zve64f_zve64x_zvfh_zvfhmin_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt
+mmu             : sv39
+uarch           : spacemit,x60
+mvendorid       : 0x710
+marchid         : 0x8000000058000001
+~~~
+
+Q4_0
+|   Model    |   Size   | Params | backend | threads | test | t/s |
+| -----------| -------- | ------ | ------- | ------- | ---- |------|
+Qwen2.5 0.5B |403.20 MiB|630.17 M|   cpu   |    4    | pp512|64.12 ± 0.26|
+Qwen2.5 0.5B |403.20 MiB|630.17 M|   cpu   |    4    | tg128|10.03 ± 0.01|
+Qwen2.5 1.5B |1011.16 MiB| 1.78 B |   cpu   |    4    | pp512|24.16 ± 0.02|
+Qwen2.5 1.5B |1011.16 MiB| 1.78 B |   cpu   |    4    | tg128|3.83 ± 0.06|
+Qwen2.5 3B   | 1.86 GiB  | 3.40 B |   cpu   |    4    | pp512|12.08 ± 0.02|
+Qwen2.5 3B   | 1.86 GiB  | 3.40 B |   cpu   |    4    | tg128|2.23 ± 0.02|
+
+Q4_1
+|   Model    |   Size   | Params | backend | threads | test | t/s |
+| -----------| -------- | ------ | ------- | ------- | ---- |------|
+Qwen2.5 0.5B |351.50 MiB|494.03 M|   cpu   |    4    | pp512|62.07 ± 0.12|
+Qwen2.5 0.5B |351.50 MiB|494.03 M|   cpu   |    4    | tg128|9.91 ± 0.01|
+Qwen2.5 1.5B |964.06 MiB| 1.54 B |   cpu   |    4    | pp512|22.95 ± 0.25|
+Qwen2.5 1.5B |964.06 MiB| 1.54 B |   cpu   |    4    | tg128|4.01 ± 0.15|
+Qwen2.5 3B   | 1.85 GiB | 3.09 B |   cpu   |    4    | pp512|11.55 ± 0.16|
+Qwen2.5 3B   | 1.85 GiB | 3.09 B |   cpu   |    4    | tg128|2.25 ± 0.04|
+
+
+Q4_K
+|   Model    |   Size   | Params | backend | threads | test | t/s |
+| -----------| -------- | ------ | ------- | ------- | ---- |------|
+Qwen2.5 0.5B |462.96 MiB|630.17 M|   cpu   |    4    | pp512|9.29 ± 0.05|
+Qwen2.5 0.5B |462.96 MiB|630.17 M|   cpu   |    4    | tg128|5.67 ± 0.04|
+Qwen2.5 1.5B | 1.04 GiB | 1.78 B |   cpu   |    4    | pp512|10.38 ± 0.10|
+Qwen2.5 1.5B | 1.04 GiB | 1.78 B |   cpu   |    4    | tg128|3.17 ± 0.08|
+Qwen2.5 3B   | 1.95 GiB | 3.40 B |   cpu   |    4    | pp512|4.23 ± 0.04|
+Qwen2.5 3B   | 1.95 GiB | 3.40 B |   cpu   |    4    | tg128|1.73 ± 0.00|
@@ -135,6 +135,10 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
     return p;
 }
 
+static const char * dl_error() {
+    return "";
+}
+
 #else
 
 using dl_handle = void;
@@ -155,6 +159,11 @@ static void * dl_get_sym(dl_handle * handle, const char * name) {
     return dlsym(handle, name);
 }
 
+static const char * dl_error() {
+    const char *rslt = dlerror();
+    return rslt != nullptr ? rslt : "";
+}
+
 #endif
 
 using dl_handle_ptr = std::unique_ptr<dl_handle, dl_handle_deleter>;
@@ -240,7 +249,7 @@ struct ggml_backend_registry {
         dl_handle_ptr handle { dl_load_library(path) };
         if (!handle) {
             if (!silent) {
-                GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(path).c_str());
+                GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(path).c_str(), dl_error());
             }
             return nullptr;
         }
@@ -531,7 +540,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
                 if (filename.native().find(file_prefix) == 0 && ext == file_extension) {
                     dl_handle_ptr handle { dl_load_library(entry) };
                     if (!handle && !silent) {
-                        GGML_LOG_ERROR("%s: failed to load %s\n", __func__, path_str(entry.path()).c_str());
+                        GGML_LOG_ERROR("%s: failed to load %s: %s\n", __func__, path_str(entry.path()).c_str(), dl_error());
                     }
                     if (handle) {
                         auto score_fn = (ggml_backend_score_t) dl_get_sym(handle.get(), "ggml_backend_score");
 
@@ -18,6 +18,10 @@
 #    include "kleidiai/kleidiai.h"
 #endif
 
+#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
+#    include "spacemit/ime.h"
+#endif
+
 #if defined(_WIN32)
 #    define WIN32_LEAN_AND_MEAN
 #    ifndef NOMINMAX
@@ -45,6 +49,12 @@ std::vector<ggml_backend_buffer_type_t> & ggml_backend_cpu_get_extra_buffer_type
 //         }
 // #endif
 
+#ifdef GGML_USE_CPU_RISCV64_SPACEMIT
+        if (ggml_backend_cpu_riscv64_spacemit_buffer_type()) {
+            bufts.push_back(ggml_backend_cpu_riscv64_spacemit_buffer_type());
+        }
+#endif
+
 #ifdef GGML_USE_CPU_KLEIDIAI
         if (ggml_backend_cpu_kleidiai_buffer_type()) {
             bufts.push_back(ggml_backend_cpu_kleidiai_buffer_type());