apicalshark
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ci/README.md‎
Lines changed: 39 additions & 0 deletions b/‎ci/README.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎ci/run.sh‎
Lines changed: 24 additions & 6 deletions b/‎ci/run.sh‎
Lines changed: 24 additions & 6 deletions
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions b/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 2 deletions
@@ -803,7 +803,7 @@ jobs:
     env:
       OPENBLAS_VERSION: 0.3.23
       SDE_VERSION: 9.33.0-2024-01-07
-      VULKAN_VERSION: 1.4.304.1
+      VULKAN_VERSION: 1.4.309.0
 
     strategy:
       matrix:
 
@@ -112,6 +112,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 - [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM)
 - [x] [QRWKV-6](https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1)
 - [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct)
+- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview)
+- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
 
 #### Multimodal
 
 
@@ -26,4 +26,43 @@ GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
 # with SYCL support
 source /opt/intel/oneapi/setvars.sh
 GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+
+# with MUSA support
+GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+```
+
+## Running MUSA CI in a Docker Container
+
+Assuming `$PWD` is the root of the `llama.cpp` repository, follow these steps to set up and run MUSA CI in a Docker container:
+
+### 1. Create a local directory to store cached models, configuration files and venv:
+
+```bash
+mkdir -p $HOME/llama.cpp/ci-cache
+```
+
+### 2. Create a local directory to store CI run results:
+
+```bash
+mkdir -p $HOME/llama.cpp/ci-results
+```
+
+### 3. Start a Docker container and run the CI:
+
+```bash
+docker run --privileged -it \
+    -v $HOME/llama.cpp/ci-cache:/ci-cache \
+    -v $HOME/llama.cpp/ci-results:/ci-results \
+    -v $PWD:/ws -w /ws \
+    mthreads/musa:rc3.1.1-devel-ubuntu22.04
 ```
+
+Inside the container, execute the following commands:
+
+```bash
+apt update -y && apt install -y bc cmake ccache git python3.10-venv time unzip wget
+git config --global --add safe.directory /ws
+GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
+```
+
+This setup ensures that the CI runs within an isolated Docker environment while maintaining cached files and results across runs.
@@ -16,6 +16,9 @@
 # # with VULKAN support
 # GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
 #
+# # with MUSA support
+# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
+#
 
 if [ -z "$2" ]; then
     echo "usage: $0 <output-dir> <mnt-dir>"
@@ -52,13 +55,22 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
         echo "source /opt/intel/oneapi/setvars.sh"
         exit 1
     fi
-
+    # Use only main GPU
+    export ONEAPI_DEVICE_SELECTOR="level_zero:0"
+    # Enable sysman for correct memory reporting
+    export ZES_ENABLE_SYSMAN=1
     CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
 fi
 
 if [ ! -z ${GG_BUILD_VULKAN} ]; then
     CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
 fi
+
+if [ ! -z ${GG_BUILD_MUSA} ]; then
+    # Use qy1 by default (MTT S80)
+    MUSA_ARCH=${MUSA_ARCH:-21}
+    CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
+fi
 ## helpers
 
 # download a file if it does not exist or if it is outdated
@@ -808,7 +820,7 @@ export LLAMA_LOG_PREFIX=1
 export LLAMA_LOG_TIMESTAMPS=1
 
 if [ -z ${GG_BUILD_LOW_PERF} ]; then
-    # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
+    # Create symlink: ./llama.cpp/models-mnt -> $MNT/models
     rm -rf ${SRC}/models-mnt
     mnt_models=${MNT}/models
     mkdir -p ${mnt_models}
@@ -826,16 +838,20 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
 fi
 
 ret=0
-
-test $ret -eq 0 && gg_run ctest_debug
+if [ -z ${GG_BUILD_SYCL} ]; then
+    # SYCL build breaks with debug build flags
+    test $ret -eq 0 && gg_run ctest_debug
+fi
 test $ret -eq 0 && gg_run ctest_release
 
 if [ -z ${GG_BUILD_LOW_PERF} ]; then
     test $ret -eq 0 && gg_run embd_bge_small
     test $ret -eq 0 && gg_run rerank_tiny
 
     if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
-        test $ret -eq 0 && gg_run test_scripts_debug
+        if [ -z ${GG_BUILD_SYCL} ]; then
+            test $ret -eq 0 && gg_run test_scripts_debug
+        fi
         test $ret -eq 0 && gg_run test_scripts_release
     fi
 
@@ -846,7 +862,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
             test $ret -eq 0 && gg_run pythia_2_8b
             #test $ret -eq 0 && gg_run open_llama_7b_v2
         fi
-        test $ret -eq 0 && gg_run ctest_with_model_debug
+        if [ -z ${GG_BUILD_SYCL} ]; then
+            test $ret -eq 0 && gg_run ctest_with_model_debug
+        fi
         test $ret -eq 0 && gg_run ctest_with_model_release
     fi
 fi
 
@@ -114,8 +114,8 @@ if (LLAMA_LLGUIDANCE)
 
     ExternalProject_Add(llguidance_ext
         GIT_REPOSITORY https://github.com/guidance-ai/llguidance
-        # v0.6.12:
-        GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09
+        # v0.7.10:
+        GIT_TAG 0309d2a6bf40abda35344a362edc71e06d5009f8
         PREFIX ${CMAKE_BINARY_DIR}/llguidance
         SOURCE_DIR ${LLGUIDANCE_SRC}
         BUILD_IN_SOURCE TRUE