AOSC-Dev · MingcongBai · Sep 29, 2025 · Sep 27, 2025 · Sep 27, 2025 · Sep 27, 2025
diff --git a/runtime-creativity/llama.cpp/autobuild/beyond b/runtime-creativity/llama.cpp/autobuild/beyond
@@ -1,9 +1,2 @@
-abinfo "Purging test suites ..."
-rm -v "$PKGDIR"/usr/bin/test-*
 abinfo "Purging GGUF converter, which depends on non-packaged Python packages ..."
 rm -v "$PKGDIR"/usr/bin/convert_hf_to_gguf.py
-
-# FIXME: Both whisper.cpp and llama.cpp have conflicting ggml files and paths, we don't need it
-abinfo "Delete all ggml files ..."
-rm -rv "$PKGDIR"/usr/include/
-rm -rv "$PKGDIR"/usr/lib/cmake/ggml/
diff --git a/runtime-creativity/llama.cpp/autobuild/defines b/runtime-creativity/llama.cpp/autobuild/defines
@@ -1,24 +1,10 @@
 PKGNAME=llama.cpp
 PKGDES="C++ implementation of LLM inference"
 PKGSEC=misc
-PKGDEP="vulkan-loader libcl gcc-runtime"
-BUILDDEP="shaderc glslang vulkan-headers opencl-registry-api"
+PKGDEP="gcc-runtime glibc ggml curl"
 
 ABTYPE=cmakeninja
-CMAKE_AFTER="-DGGML_BACKEND_DL=OFF \
-             -DGGML_NATIVE=OFF \
-             -DGGML_VULKAN=ON \
-             -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=OFF \
-             -DBUILD_SHARED_LIBS=OFF \
-             -DGGML_STATIC:BOOL=ON"
-
-# FIXME: Because -DGGML_BACKEND_DL=OFF  so -DGGML_CPU_ALL_VARIANTS=ON, it cannot be enabled
-CMAKE_AFTER__AMD64="${CMAKE_AFTER} \
-                    -DGGML_CPU_ALL_VARIANTS=OFF"
-# FIXME: No runtime dispatch for RISC-V yet
-CMAKE_AFTER__RISCV64="${CMAKE_AFTER} \
-                      -DGGML_RVV=OFF"
-
-# FIXME: -DGGML_BACKEND_DL=ON and -DBUILD_SHARED_LIBS=OFF cannot be turned on at the same time
-# CMake Error at ggml/src/CMakeLists.txt:187 (message):
-# GGML_BACKEND_DL requires BUILD_SHARED_LIBS
+CMAKE_AFTER=(
+    '-DLLAMA_BUILD_TESTS=OFF'
+    '-DLLAMA_USE_SYSTEM_GGML=ON'
+)
diff --git a/runtime-creativity/llama.cpp/autobuild/patches/0001-AOSCOS-add-system-backend-path.patch b/runtime-creativity/llama.cpp/autobuild/patches/0001-AOSCOS-add-system-backend-path.patch
diff --git a/...ivity/llama.cpp/autobuild/patches/0001-AOSCOS-disable-FlashAttention-on-loongarch64.patch b/...ivity/llama.cpp/autobuild/patches/0001-AOSCOS-disable-FlashAttention-on-loongarch64.patch
@@ -0,0 +1,48 @@
+From 45a57d3ee411a0d95a79a25b306214e0cd6419f9 Mon Sep 17 00:00:00 2001
+From: Qing Yun <[email protected]>
+Date: Sun, 28 Sep 2025 21:35:03 +0800
+Subject: [PATCH] AOSCOS: disable FlashAttention on loongarch64
+
+- After PR15434 in ggml-org/llama.cpp, llama.cpp use FlashAttention by default
+  But this option are broken on loongarch with LSX or LSAX.
+  Link: https://github.com/ggml-org/llama.cpp/pull/15434
+  Link: https://github.com/ggml-org/llama.cpp/issues/15854
+---
+ common/common.h       | 4 ++++
+ src/llama-context.cpp | 4 ++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/common/common.h b/common/common.h
+index 40c6847f..99471467 100644
+--- a/common/common.h
++++ b/common/common.h
+@@ -313,7 +313,11 @@ struct common_params {
+     enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
+     enum llama_pooling_type      pooling_type      = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
+     enum llama_attention_type    attention_type    = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
++#if defined(__loongarch64)
++    enum llama_flash_attn_type   flash_attn_type   = LLAMA_FLASH_ATTN_TYPE_DISABLED; // whether to use Flash Attention
++#else
+     enum llama_flash_attn_type   flash_attn_type   = LLAMA_FLASH_ATTN_TYPE_AUTO; // whether to use Flash Attention
++#endif /*(__loongarch64)*/
+
+     struct common_params_sampling    sampling;
+     struct common_params_speculative speculative;
+diff --git a/src/llama-context.cpp b/src/llama-context.cpp
+index d8a8b5e6..f18a4709 100644
+--- a/src/llama-context.cpp
++++ b/src/llama-context.cpp
+@@ -2274,7 +2274,11 @@ llama_context_params llama_context_default_params() {
+         /*.rope_scaling_type           =*/ LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
+         /*.pooling_type                =*/ LLAMA_POOLING_TYPE_UNSPECIFIED,
+         /*.attention_type              =*/ LLAMA_ATTENTION_TYPE_UNSPECIFIED,
++#if defined(__loongarch64)
++        /*.flash_attn_type             =*/ LLAMA_FLASH_ATTN_TYPE_DISABLED,
++#else
+         /*.flash_attn_type             =*/ LLAMA_FLASH_ATTN_TYPE_AUTO,
++#endif /*(__loongarch64)*/
+         /*.rope_freq_base              =*/ 0.0f,
+         /*.rope_freq_scale             =*/ 0.0f,
+         /*.yarn_ext_factor             =*/ -1.0f,
+--
+2.51.0
diff --git a/runtime-creativity/llama.cpp/spec b/runtime-creativity/llama.cpp/spec
@@ -1,4 +1,4 @@
-VER=6265
+VER=6615
 # The build system uses git tags to determine version number
 SRCS="git::commit=tags/b${VER};copy-repo=true::https://github.com/ggerganov/llama.cpp.git"
 CHKSUMS="SKIP"

diff --git a/runtime-creativity/whisper.cpp/autobuild/defines b/runtime-creativity/whisper.cpp/autobuild/defines
@@ -1,11 +1,11 @@
 PKGNAME=whisper.cpp
 PKGSEC=misc
-PKGDEP="vulkan"
-BUILDDEP="shaderc"
+PKGDEP="gcc-runtime glibc ggml"
 PKGDES="Utility and library to work with OpenAI's automatic speech recognition model"
 
 CMAKE_AFTER=(
-  "-DGGML_VULKAN=1"
+    '-DWHISPER_BUILD_TESTS=OFF'
+    '-DWHISPER_USE_SYSTEM_GGML=ON'
 )
 
 # FIXME: fix riscv64 build

diff --git a/runtime-creativity/whisper.cpp/spec b/runtime-creativity/whisper.cpp/spec
@@ -2,3 +2,4 @@ VER=1.7.6
 SRCS="git::commit=tags/v$VER::https://github.com/ggml-org/whisper.cpp"
 CHKSUMS="SKIP"
 CHKUPDATE="anitya::id=372280"
+REL=1
diff --git a/runtime-scientific/ggml/autobuild/defines b/runtime-scientific/ggml/autobuild/defines
@@ -0,0 +1,61 @@
+PKGNAME=ggml
+PKGSEC=libs
+PKGDEP="gcc-runtime glibc vulkan-loader libcl openblas"
+PKGDES="Tensor library for machine learning"
+BUILDDEP="vulkan-headers glslang shaderc opencl-registry-api openblas lapack"
+
+ABTYPE=cmakeninja
+CMAKE_AFTER=(
+    '-DBUILD_SHARED_LIBS=ON'
+    '-DGGML_BACKEND_DL=ON'
+    '-DGGML_BACKEND_DIR=/usr/lib/ggml'
+    '-DGGML_NATIVE=OFF'
+    '-DGGML_CPU_REPACK=ON'
+    '-DGGML_VULKAN=ON'
+    '-DGGML_OPENCL=ON'
+    '-DGGML_OPENCL_USE_ADRENO_KERNELS=OFF'
+    '-DGGML_BLAS=ON'
+    '-DGGML_BLAS_VENDOR=OpenBLAS'
+)
+
+CMAKE_AFTER__AMD64=(
+    "${CMAKE_AFTER[@]}"
+    '-DGGML_CPU_ALL_VARIANTS=ON'
+)
+
+CMAKE_AFTER__ARM64=(
+    "${CMAKE_AFTER[@]}"
+    '-DGGML_CPU_ALL_VARIANTS=ON'
+)
+
+CMAKE_AFTER__LOONGARCH64=(
+    "${CMAKE_AFTER[@]}"
+    '-DGGML_LSX=ON'
+# FIXME: LSX backend is broken now, enable LASX
+    '-DGGML_LASX=ON'
+)
+
+CMAKE_AFTER__LOONGARCH64_NOSIMD=(
+    "${CMAKE_AFTER[@]}"
+    '-DGGML_LSX=OFF'
+    '-DGGML_LASX=OFF'
+)
+
+CMAKE_AFTER__RISCV64=(
+    "${CMAKE_AFTER[@]}"
+    '-DGGML_RVV=OFF'
+    '-DGGML_RV_ZFH=OFF'
+    '-DGGML_RV_ZVFH=OFF'
+    '-DGGML_RV_ZICBOP=OFF'
+)
+
+CMAKE_AFTER__LOONGSON3=(
+    "${CMAKE_AFTER[@]}"
+)
+
+CMAKE_AFTER__PPC64EL=(
+    "${CMAKE_AFTER[@]}"
+    '-DGGML_CPU_ALL_VARIANTS=ON'
+)
+
+PKGBREAK="llama.cpp<=6265 whisper.cpp<=1.7.6"
diff --git a/...scientific/ggml/autobuild/patches/0001-ggml-cpu-fix-loongarch-lsx-compilation-error.patch b/...scientific/ggml/autobuild/patches/0001-ggml-cpu-fix-loongarch-lsx-compilation-error.patch
@@ -0,0 +1,104 @@
+From a8f5377f6f0587284300fdc53b7cb1a9b479267b Mon Sep 17 00:00:00 2001
+From: Qing Yun <[email protected]>
+Date: Sun, 28 Sep 2025 06:48:16 +0800
+Subject: [PATCH] ggml-cpu: fix loongarch lsx compilation error
+
+- Backport from llama.cpp
+  Link: https://github.com/ggml-org/llama.cpp/pull/15864
+---
+ src/ggml-cpu/arch/loongarch/quants.c | 24 ++++++++++++------------
+ src/ggml-cpu/simd-mappings.h         | 16 ++++++++--------
+ 2 files changed, 20 insertions(+), 20 deletions(-)
+
+diff --git a/src/ggml-cpu/arch/loongarch/quants.c b/src/ggml-cpu/arch/loongarch/quants.c
+index 0f9af7bf..22fc7607 100644
+--- a/src/ggml-cpu/arch/loongarch/quants.c
++++ b/src/ggml-cpu/arch/loongarch/quants.c
+@@ -105,6 +105,18 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
+
+     return ((v4f32)res)[0];
+ }
++
++// multiply int8_t, add results pairwise twice
++static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
++    // Get absolute values of x vectors
++    const __m128i ax = __lsx_vsigncov_b(x, x);
++    // Sign the values of the y vectors
++    const __m128i sy = __lsx_vsigncov_b(x, y);
++    // Perform multiplication and create 16-bit values
++    const __m128i dot = lsx_maddubs_h(ax, sy);
++    const __m128i ones = __lsx_vreplgr2vr_h(1);
++    return lsx_madd_h(ones, dot);
++}
+ #endif
+
+ #if defined(__loongarch_asx)
+@@ -323,18 +335,6 @@ static inline __m256i lasx_xvandi_b_bit(__m256i a, const unsigned int b) {
+     }
+ }
+
+-// multiply int8_t, add results pairwise twice
+-static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
+-    // Get absolute values of x vectors
+-    const __m128i ax = __lsx_vsigncov_b(x, x);
+-    // Sign the values of the y vectors
+-    const __m128i sy = __lsx_vsigncov_b(x, y);
+-    // Perform multiplication and create 16-bit values
+-    const __m128i dot = lsx_maddubs_h(ax, sy);
+-    const __m128i ones = __lsx_vreplgr2vr_h(1);
+-    return lsx_madd_h(ones, dot);
+-}
+-
+ // horizontally add 8 floats
+ static inline float hsum_float_8(const __m256 x) {
+     __m128 res = lasx_extractf128(x, 1);
+diff --git a/src/ggml-cpu/simd-mappings.h b/src/ggml-cpu/simd-mappings.h
+index a84ba75c..8daec663 100644
+--- a/src/ggml-cpu/simd-mappings.h
++++ b/src/ggml-cpu/simd-mappings.h
+@@ -998,9 +998,9 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
+ #define GGML_F32_EPR  4
+
+ #define GGML_F32x4         __m128
+-#define GGML_F32x4_ZERO    __lsx_vldi(0)
+-#define GGML_F32x4_SET1(x) __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
+-#define GGML_F32x4_LOAD(x) __lsx_vld((x), 0)
++#define GGML_F32x4_ZERO    (__m128)__lsx_vldi(0)
++#define GGML_F32x4_SET1(x) (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
++#define GGML_F32x4_LOAD(x) (__m128)__lsx_vld((x), 0)
+ #define GGML_F32x4_STORE(x, y)   __lsx_vst(y, x, 0)
+ #define GGML_F32x4_FMA(a, b, c) __lsx_vfmadd_s(b, c, a)
+ #define GGML_F32x4_ADD     __lsx_vfadd_s
+@@ -1022,7 +1022,7 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
+     __m128i tmp     = __lsx_vsrli_d((__m128i) x[0], 32);                              \
+     tmp             = (__m128i) __lsx_vfadd_s((__m128) tmp, x[0]);                    \
+     tmp             = __lsx_vpickev_w(__lsx_vldi(0), tmp);                            \
+-    const __m128 t0 = __lsx_vshuf4i_w(tmp, 0x88);                                     \
++    const __m128 t0 = (__m128)__lsx_vshuf4i_w(tmp, 0x88);                                     \
+     tmp             = __lsx_vsrli_d((__m128i) t0, 32);                                \
+     tmp             = (__m128i) __lsx_vfadd_s((__m128) tmp, t0);                      \
+     tmp             = __lsx_vpickev_w(__lsx_vldi(0), tmp);                            \
+@@ -1052,7 +1052,7 @@ static inline __m128 __lsx_f16x4_load(const ggml_fp16_t * x) {
+     tmp[2] = GGML_CPU_FP16_TO_FP32(x[2]);
+     tmp[3] = GGML_CPU_FP16_TO_FP32(x[3]);
+
+-    return __lsx_vld(tmp, 0);
++    return (__m128)__lsx_vld(tmp, 0);
+ }
+
+ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
+@@ -1067,9 +1067,9 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
+ }
+
+ #define GGML_F32Cx4             __m128
+-#define GGML_F32Cx4_ZERO        __lsx_vldi(0)
+-#define GGML_F32Cx4_SET1(x)     __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
+-#define GGML_F32Cx4_LOAD(x)     __lsx_f16x4_load(x)
++#define GGML_F32Cx4_ZERO        (__m128)__lsx_vldi(0)
++#define GGML_F32Cx4_SET1(x)     (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
++#define GGML_F32Cx4_LOAD(x)     (__m128)__lsx_f16x4_load(x)
+ #define GGML_F32Cx4_STORE(x, y) __lsx_f16x4_store(x, y)
+ #define GGML_F32Cx4_FMA         GGML_F32x4_FMA
+ #define GGML_F32Cx4_ADD         __lsx_vfadd_s
+--
+2.51.0
diff --git a/runtime-scientific/ggml/spec b/runtime-scientific/ggml/spec
@@ -0,0 +1,4 @@
+VER=0.9.3
+SRCS="git::commit=tags/v$VER::https://github.com/ggml-org/ggml"
+CHKSUMS="SKIP"
+CHKUPDATE="anitya::id=383765"