Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions runtime-creativity/llama.cpp/autobuild/beyond
Original file line number Diff line number Diff line change
@@ -1,9 +1,2 @@
abinfo "Purging test suites ..."
rm -v "$PKGDIR"/usr/bin/test-*
abinfo "Purging GGUF converter, which depends on non-packaged Python packages ..."
rm -v "$PKGDIR"/usr/bin/convert_hf_to_gguf.py

# FIXME: Both whisper.cpp and llama.cpp have conflicting ggml files and paths, we don't need it
abinfo "Delete all ggml files ..."
rm -rv "$PKGDIR"/usr/include/
rm -rv "$PKGDIR"/usr/lib/cmake/ggml/
24 changes: 5 additions & 19 deletions runtime-creativity/llama.cpp/autobuild/defines
Original file line number Diff line number Diff line change
@@ -1,24 +1,10 @@
PKGNAME=llama.cpp
PKGDES="C++ implementation of LLM inference"
PKGSEC=misc
PKGDEP="vulkan-loader libcl gcc-runtime"
BUILDDEP="shaderc glslang vulkan-headers opencl-registry-api"
PKGDEP="gcc-runtime glibc ggml curl"

ABTYPE=cmakeninja
CMAKE_AFTER="-DGGML_BACKEND_DL=OFF \
-DGGML_NATIVE=OFF \
-DGGML_VULKAN=ON \
-DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=OFF \
-DBUILD_SHARED_LIBS=OFF \
-DGGML_STATIC:BOOL=ON"

# FIXME: Because -DGGML_BACKEND_DL=OFF so -DGGML_CPU_ALL_VARIANTS=ON, it cannot be enabled
CMAKE_AFTER__AMD64="${CMAKE_AFTER} \
-DGGML_CPU_ALL_VARIANTS=OFF"
# FIXME: No runtime dispatch for RISC-V yet
CMAKE_AFTER__RISCV64="${CMAKE_AFTER} \
-DGGML_RVV=OFF"

# FIXME: -DGGML_BACKEND_DL=ON and -DBUILD_SHARED_LIBS=OFF cannot be turned on at the same time
# CMake Error at ggml/src/CMakeLists.txt:187 (message):
# GGML_BACKEND_DL requires BUILD_SHARED_LIBS
CMAKE_AFTER=(
'-DLLAMA_BUILD_TESTS=OFF'
'-DLLAMA_USE_SYSTEM_GGML=ON'
)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
From 45a57d3ee411a0d95a79a25b306214e0cd6419f9 Mon Sep 17 00:00:00 2001
From: Qing Yun <[email protected]>
Date: Sun, 28 Sep 2025 21:35:03 +0800
Subject: [PATCH] AOSCOS: disable FlashAttention on loongarch64

- After PR15434 in ggml-org/llama.cpp, llama.cpp use FlashAttention by default
But this option are broken on loongarch with LSX or LSAX.
Link: https://github.com/ggml-org/llama.cpp/pull/15434
Link: https://github.com/ggml-org/llama.cpp/issues/15854
---
common/common.h | 4 ++++
src/llama-context.cpp | 4 ++++
2 files changed, 8 insertions(+)

diff --git a/common/common.h b/common/common.h
index 40c6847f..99471467 100644
--- a/common/common.h
+++ b/common/common.h
@@ -313,7 +313,11 @@ struct common_params {
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
+#if defined(__loongarch64)
+ enum llama_flash_attn_type flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED; // whether to use Flash Attention
+#else
enum llama_flash_attn_type flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO; // whether to use Flash Attention
+#endif /*(__loongarch64)*/

struct common_params_sampling sampling;
struct common_params_speculative speculative;
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index d8a8b5e6..f18a4709 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -2274,7 +2274,11 @@ llama_context_params llama_context_default_params() {
/*.rope_scaling_type =*/ LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED,
/*.pooling_type =*/ LLAMA_POOLING_TYPE_UNSPECIFIED,
/*.attention_type =*/ LLAMA_ATTENTION_TYPE_UNSPECIFIED,
+#if defined(__loongarch64)
+ /*.flash_attn_type =*/ LLAMA_FLASH_ATTN_TYPE_DISABLED,
+#else
/*.flash_attn_type =*/ LLAMA_FLASH_ATTN_TYPE_AUTO,
+#endif /*(__loongarch64)*/
/*.rope_freq_base =*/ 0.0f,
/*.rope_freq_scale =*/ 0.0f,
/*.yarn_ext_factor =*/ -1.0f,
--
2.51.0
2 changes: 1 addition & 1 deletion runtime-creativity/llama.cpp/spec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VER=6265
VER=6615
# The build system uses git tags to determine version number
SRCS="git::commit=tags/b${VER};copy-repo=true::https://github.com/ggerganov/llama.cpp.git"
CHKSUMS="SKIP"
Expand Down
6 changes: 3 additions & 3 deletions runtime-creativity/whisper.cpp/autobuild/defines
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
PKGNAME=whisper.cpp
PKGSEC=misc
PKGDEP="vulkan"
BUILDDEP="shaderc"
PKGDEP="gcc-runtime glibc ggml"
PKGDES="Utility and library to work with OpenAI's automatic speech recognition model"

CMAKE_AFTER=(
"-DGGML_VULKAN=1"
'-DWHISPER_BUILD_TESTS=OFF'
'-DWHISPER_USE_SYSTEM_GGML=ON'
)

# FIXME: fix riscv64 build
Expand Down
1 change: 1 addition & 0 deletions runtime-creativity/whisper.cpp/spec
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ VER=1.7.6
SRCS="git::commit=tags/v$VER::https://github.com/ggml-org/whisper.cpp"
CHKSUMS="SKIP"
CHKUPDATE="anitya::id=372280"
REL=1
61 changes: 61 additions & 0 deletions runtime-scientific/ggml/autobuild/defines
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
PKGNAME=ggml
PKGSEC=libs
PKGDEP="gcc-runtime glibc vulkan-loader libcl openblas"
PKGDES="Tensor library for machine learning"
BUILDDEP="vulkan-headers glslang shaderc opencl-registry-api openblas lapack"

ABTYPE=cmakeninja
CMAKE_AFTER=(
'-DBUILD_SHARED_LIBS=ON'
'-DGGML_BACKEND_DL=ON'
'-DGGML_BACKEND_DIR=/usr/lib/ggml'
'-DGGML_NATIVE=OFF'
'-DGGML_CPU_REPACK=ON'
'-DGGML_VULKAN=ON'
'-DGGML_OPENCL=ON'
'-DGGML_OPENCL_USE_ADRENO_KERNELS=OFF'
'-DGGML_BLAS=ON'
'-DGGML_BLAS_VENDOR=OpenBLAS'
)

CMAKE_AFTER__AMD64=(
"${CMAKE_AFTER[@]}"
'-DGGML_CPU_ALL_VARIANTS=ON'
)

CMAKE_AFTER__ARM64=(
"${CMAKE_AFTER[@]}"
'-DGGML_CPU_ALL_VARIANTS=ON'
)

CMAKE_AFTER__LOONGARCH64=(
"${CMAKE_AFTER[@]}"
'-DGGML_LSX=ON'
# FIXME: LSX backend is broken now, enable LASX
'-DGGML_LASX=ON'
)

CMAKE_AFTER__LOONGARCH64_NOSIMD=(
"${CMAKE_AFTER[@]}"
'-DGGML_LSX=OFF'
'-DGGML_LASX=OFF'
)

CMAKE_AFTER__RISCV64=(
"${CMAKE_AFTER[@]}"
'-DGGML_RVV=OFF'
'-DGGML_RV_ZFH=OFF'
'-DGGML_RV_ZVFH=OFF'
'-DGGML_RV_ZICBOP=OFF'
)

CMAKE_AFTER__LOONGSON3=(
"${CMAKE_AFTER[@]}"
)

CMAKE_AFTER__PPC64EL=(
"${CMAKE_AFTER[@]}"
'-DGGML_CPU_ALL_VARIANTS=ON'
)

PKGBREAK="llama.cpp<=6265 whisper.cpp<=1.7.6"
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
From a8f5377f6f0587284300fdc53b7cb1a9b479267b Mon Sep 17 00:00:00 2001
From: Qing Yun <[email protected]>
Date: Sun, 28 Sep 2025 06:48:16 +0800
Subject: [PATCH] ggml-cpu: fix loongarch lsx compilation error

- Backport from llama.cpp
Link: https://github.com/ggml-org/llama.cpp/pull/15864
---
src/ggml-cpu/arch/loongarch/quants.c | 24 ++++++++++++------------
src/ggml-cpu/simd-mappings.h | 16 ++++++++--------
2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/ggml-cpu/arch/loongarch/quants.c b/src/ggml-cpu/arch/loongarch/quants.c
index 0f9af7bf..22fc7607 100644
--- a/src/ggml-cpu/arch/loongarch/quants.c
+++ b/src/ggml-cpu/arch/loongarch/quants.c
@@ -105,6 +105,18 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128

return ((v4f32)res)[0];
}
+
+// multiply int8_t, add results pairwise twice
+static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
+ // Get absolute values of x vectors
+ const __m128i ax = __lsx_vsigncov_b(x, x);
+ // Sign the values of the y vectors
+ const __m128i sy = __lsx_vsigncov_b(x, y);
+ // Perform multiplication and create 16-bit values
+ const __m128i dot = lsx_maddubs_h(ax, sy);
+ const __m128i ones = __lsx_vreplgr2vr_h(1);
+ return lsx_madd_h(ones, dot);
+}
#endif

#if defined(__loongarch_asx)
@@ -323,18 +335,6 @@ static inline __m256i lasx_xvandi_b_bit(__m256i a, const unsigned int b) {
}
}

-// multiply int8_t, add results pairwise twice
-static inline __m128i mul_sum_i8_pairs(const __m128i x, const __m128i y) {
- // Get absolute values of x vectors
- const __m128i ax = __lsx_vsigncov_b(x, x);
- // Sign the values of the y vectors
- const __m128i sy = __lsx_vsigncov_b(x, y);
- // Perform multiplication and create 16-bit values
- const __m128i dot = lsx_maddubs_h(ax, sy);
- const __m128i ones = __lsx_vreplgr2vr_h(1);
- return lsx_madd_h(ones, dot);
-}
-
// horizontally add 8 floats
static inline float hsum_float_8(const __m256 x) {
__m128 res = lasx_extractf128(x, 1);
diff --git a/src/ggml-cpu/simd-mappings.h b/src/ggml-cpu/simd-mappings.h
index a84ba75c..8daec663 100644
--- a/src/ggml-cpu/simd-mappings.h
+++ b/src/ggml-cpu/simd-mappings.h
@@ -998,9 +998,9 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
#define GGML_F32_EPR 4

#define GGML_F32x4 __m128
-#define GGML_F32x4_ZERO __lsx_vldi(0)
-#define GGML_F32x4_SET1(x) __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
-#define GGML_F32x4_LOAD(x) __lsx_vld((x), 0)
+#define GGML_F32x4_ZERO (__m128)__lsx_vldi(0)
+#define GGML_F32x4_SET1(x) (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
+#define GGML_F32x4_LOAD(x) (__m128)__lsx_vld((x), 0)
#define GGML_F32x4_STORE(x, y) __lsx_vst(y, x, 0)
#define GGML_F32x4_FMA(a, b, c) __lsx_vfmadd_s(b, c, a)
#define GGML_F32x4_ADD __lsx_vfadd_s
@@ -1022,7 +1022,7 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) {
__m128i tmp = __lsx_vsrli_d((__m128i) x[0], 32); \
tmp = (__m128i) __lsx_vfadd_s((__m128) tmp, x[0]); \
tmp = __lsx_vpickev_w(__lsx_vldi(0), tmp); \
- const __m128 t0 = __lsx_vshuf4i_w(tmp, 0x88); \
+ const __m128 t0 = (__m128)__lsx_vshuf4i_w(tmp, 0x88); \
tmp = __lsx_vsrli_d((__m128i) t0, 32); \
tmp = (__m128i) __lsx_vfadd_s((__m128) tmp, t0); \
tmp = __lsx_vpickev_w(__lsx_vldi(0), tmp); \
@@ -1052,7 +1052,7 @@ static inline __m128 __lsx_f16x4_load(const ggml_fp16_t * x) {
tmp[2] = GGML_CPU_FP16_TO_FP32(x[2]);
tmp[3] = GGML_CPU_FP16_TO_FP32(x[3]);

- return __lsx_vld(tmp, 0);
+ return (__m128)__lsx_vld(tmp, 0);
}

static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
@@ -1067,9 +1067,9 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) {
}

#define GGML_F32Cx4 __m128
-#define GGML_F32Cx4_ZERO __lsx_vldi(0)
-#define GGML_F32Cx4_SET1(x) __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
-#define GGML_F32Cx4_LOAD(x) __lsx_f16x4_load(x)
+#define GGML_F32Cx4_ZERO (__m128)__lsx_vldi(0)
+#define GGML_F32Cx4_SET1(x) (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0)
+#define GGML_F32Cx4_LOAD(x) (__m128)__lsx_f16x4_load(x)
#define GGML_F32Cx4_STORE(x, y) __lsx_f16x4_store(x, y)
#define GGML_F32Cx4_FMA GGML_F32x4_FMA
#define GGML_F32Cx4_ADD __lsx_vfadd_s
--
2.51.0
4 changes: 4 additions & 0 deletions runtime-scientific/ggml/spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
VER=0.9.3
SRCS="git::commit=tags/v$VER::https://github.com/ggml-org/ggml"
CHKSUMS="SKIP"
CHKUPDATE="anitya::id=383765"