Skip to content

Commit b839192

Browse files
committed
Merge remote-tracking branch 'origin/master' into sl/thread-safety-test
2 parents 9381f4e + 7f4fbe5 commit b839192

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+24927
-17267
lines changed

.github/labeler.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,10 @@ nix:
8686
embedding:
8787
- changed-files:
8888
- any-glob-to-any-file: examples/embedding/
89+
90+
Ascend NPU:
91+
- changed-files:
92+
- any-glob-to-any-file:
93+
- ggml/include/ggml-cann.h
94+
- ggml/src/ggml-cann/**
95+
- docs/backend/CANN.md

.github/workflows/build-linux-cross.yml

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,3 +231,116 @@ jobs:
231231
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232232
233233
cmake --build build --config Release -j $(nproc)
234+
235+
debian-13-loongarch64-cpu-cross:
236+
runs-on: ubuntu-24.04
237+
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
238+
239+
steps:
240+
- uses: actions/checkout@v4
241+
- name: Setup LoongArch
242+
run: |
243+
rm -f /etc/apt/sources.list.d/*
244+
cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
245+
deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
246+
EOF
247+
( echo 'quiet "true";'; \
248+
echo 'APT::Get::Assume-Yes "true";'; \
249+
echo 'APT::Install-Recommends "false";'; \
250+
echo 'Acquire::Check-Valid-Until "false";'; \
251+
echo 'Acquire::Retries "5";'; \
252+
) > /etc/apt/apt.conf.d/99snapshot-repos
253+
254+
apt-get update
255+
apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
256+
dpkg --add-architecture loong64
257+
258+
# Add arch-specific repositories for non-amd64 architectures
259+
cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
260+
deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
261+
EOF
262+
263+
apt-get update || true ;# Prevent failure due to missing URLs.
264+
265+
apt-get install -y --no-install-recommends \
266+
build-essential \
267+
gcc-14-loongarch64-linux-gnu \
268+
g++-14-loongarch64-linux-gnu
269+
270+
- name: Build
271+
run: |
272+
cmake -B build -DLLAMA_CURL=OFF \
273+
-DCMAKE_BUILD_TYPE=Release \
274+
-DGGML_OPENMP=OFF \
275+
-DLLAMA_BUILD_EXAMPLES=ON \
276+
-DLLAMA_BUILD_TOOLS=ON \
277+
-DLLAMA_BUILD_TESTS=OFF \
278+
-DCMAKE_SYSTEM_NAME=Linux \
279+
-DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
280+
-DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
281+
-DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
282+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
283+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
284+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
285+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
286+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
287+
288+
cmake --build build --config Release -j $(nproc)
289+
290+
debian-13-loongarch64-vulkan-cross:
291+
runs-on: ubuntu-24.04
292+
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
293+
294+
steps:
295+
- uses: actions/checkout@v4
296+
- name: Setup LoongArch
297+
run: |
298+
rm -f /etc/apt/sources.list.d/*
299+
cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
300+
deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
301+
EOF
302+
( echo 'quiet "true";'; \
303+
echo 'APT::Get::Assume-Yes "true";'; \
304+
echo 'APT::Install-Recommends "false";'; \
305+
echo 'Acquire::Check-Valid-Until "false";'; \
306+
echo 'Acquire::Retries "5";'; \
307+
) > /etc/apt/apt.conf.d/99snapshot-repos
308+
309+
apt-get update
310+
apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
311+
dpkg --add-architecture loong64
312+
313+
# Add arch-specific repositories for non-amd64 architectures
314+
cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
315+
deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
316+
EOF
317+
318+
apt-get update || true ;# Prevent failure due to missing URLs.
319+
320+
apt-get install -y --no-install-recommends \
321+
build-essential \
322+
glslc \
323+
gcc-14-loongarch64-linux-gnu \
324+
g++-14-loongarch64-linux-gnu \
325+
libvulkan-dev:loong64
326+
327+
- name: Build
328+
run: |
329+
cmake -B build -DLLAMA_CURL=OFF \
330+
-DCMAKE_BUILD_TYPE=Release \
331+
-DGGML_VULKAN=ON \
332+
-DGGML_OPENMP=OFF \
333+
-DLLAMA_BUILD_EXAMPLES=ON \
334+
-DLLAMA_BUILD_TOOLS=ON \
335+
-DLLAMA_BUILD_TESTS=OFF \
336+
-DCMAKE_SYSTEM_NAME=Linux \
337+
-DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
338+
-DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
339+
-DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
340+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
341+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
342+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
343+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
344+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
345+
346+
cmake --build build --config Release -j $(nproc)

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -689,8 +689,8 @@ jobs:
689689
strategy:
690690
matrix:
691691
include:
692-
- build: 'cpu-x64'
693-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
692+
- build: 'cpu-x64 (static)'
693+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
694694
- build: 'openblas-x64'
695695
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
696696
- build: 'vulkan-x64'

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ ifdef LLAMA_SERVER_SSL
367367
endif
368368

369369
ifndef GGML_NO_CPU_AARCH64
370-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
370+
MK_CPPFLAGS += -DGGML_USE_CPU_REPACK
371371
endif
372372

373373
# warnings
@@ -970,7 +970,7 @@ OBJ_GGML = \
970970
$(DIR_GGML)/src/ggml-threading.o \
971971
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
972972
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
973-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
973+
$(DIR_GGML)/src/ggml-cpu/repack.o \
974974
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
975975
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
976976
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \

docs/backend/CANN.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- [DataType Supports](#datatype-supports)
99
- [Docker](#docker)
1010
- [Linux](#linux)
11+
- [Environment variable setup](#environment-variable-setup)
1112
- [TODO](#todo)
1213

1314

@@ -290,5 +291,24 @@ Authors from Peking University: Bizhao Shi ([email protected]), Yuxin Yang (yxyang
290291

291292
We would like to thank Tuo Dai, Shanni Li, and all of the project maintainers from Huawei Technologies Co., Ltd for their help during the code development and pull request.
292293

294+
## Environment variable setup
295+
296+
### GGML_CANN_ASYNC_MODE
297+
298+
Enables asynchronous operator submission. Disabled by default.
299+
300+
### GGML_CANN_MEM_POOL
301+
302+
Specifies the memory pool management strategy:
303+
304+
- vmm: Utilizes a virtual memory manager pool. If hardware support for VMM is unavailable, falls back to the legacy (leg) memory pool.
305+
306+
- prio: Employs a priority queue-based memory pool management.
307+
- leg: Uses a fixed-size buffer pool.
308+
309+
### GGML_CANN_DISABLE_BUF_POOL_CLEAN
310+
311+
Controls automatic cleanup of the memory pool. This option is only effective when using the prio or leg memory pool strategies.
312+
293313
## TODO
294314
- Support more models and data types.

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
105105
message(DEBUG "INS_ENB : ${INS_ENB}")
106106

107107
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
108-
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
108+
option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
109109
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
110110
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
111111
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})

ggml/src/ggml-cann/common.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <thread>
3838
#include <unistd.h>
3939
#include <functional>
40+
#include <optional>
4041

4142
#include "../include/ggml-cann.h"
4243
#include "../include/ggml.h"
@@ -103,6 +104,9 @@ const ggml_cann_device_info& ggml_cann_info();
103104
void ggml_cann_set_device(int32_t device);
104105
int32_t ggml_cann_get_device();
105106

107+
std::optional<std::string> get_env(const std::string& name);
108+
bool parse_bool(const std::string& value);
109+
106110
/**
107111
* @brief Abstract base class for memory pools used by CANN.
108112
*/
@@ -354,7 +358,8 @@ struct ggml_backend_cann_context {
354358
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
355359
ggml_cann_set_device(device);
356360
description = aclrtGetSocName();
357-
async_mode = (getenv("GGML_CANN_ASYNC_MODE") != nullptr);
361+
362+
bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
358363
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
359364
device, async_mode ? "ON" : "OFF");
360365
}

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include <mutex>
3232
#include <queue>
3333
#include <chrono>
34+
#include <unordered_set>
35+
#include <optional>
3436

3537
#include "ggml-impl.h"
3638
#include "ggml-backend-impl.h"
@@ -93,6 +95,26 @@ int32_t ggml_cann_get_device() {
9395
return id;
9496
}
9597

98+
/**
99+
* @brief Get the value of the specified environment variable (name).
100+
* if not empty, return a std::string object
101+
*/
102+
std::optional<std::string> get_env(const std::string& name) {
103+
const char* val = std::getenv(name.c_str());
104+
if (!val) return std::nullopt;
105+
std::string res = std::string(val);
106+
std::transform(res.begin(), res.end(), res.begin(), ::tolower);
107+
return res;
108+
}
109+
110+
/**
111+
* @brief Verify whether the environment variable is a valid value.
112+
*/
113+
bool parse_bool(const std::string& value) {
114+
std::unordered_set<std::string> valid_values = {"on", "1", "yes", "y", "enable", "true"};
115+
return valid_values.find(value) != valid_values.end();
116+
}
117+
96118
/**
97119
* @brief Initialize the CANN device information.
98120
*
@@ -214,7 +236,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
214236
* @param device The device ID to associate with this buffer pool.
215237
*/
216238
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
217-
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
239+
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
218240
}
219241

220242
/**
@@ -410,7 +432,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
410432
* @param device The device ID to associate with this buffer pool.
411433
*/
412434
explicit ggml_cann_pool_buf(int device) : device(device) {
413-
disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr;
435+
disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
414436
}
415437

416438
/**
@@ -731,16 +753,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
731753
*/
732754
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(
733755
int device) {
734-
bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr);
735-
if (!disable_vmm && ggml_cann_info().devices[device].vmm) {
736-
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
737-
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
738-
}
739-
bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr);
740-
if (enable_buf_prio) {
756+
std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
757+
758+
if (mem_pool_type == "prio") {
741759
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
742760
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf_prio(device));
743761
}
762+
763+
if (ggml_cann_info().devices[device].vmm && mem_pool_type != "leg") {
764+
GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device);
765+
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_vmm(device));
766+
}
767+
744768
GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device);
745769
return std::unique_ptr<ggml_cann_pool>(new ggml_cann_pool_buf(device));
746770
}

ggml/src/ggml-common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
10741074
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
10751075
GGML_TABLE_END()
10761076

1077+
GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
1078+
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
1079+
GGML_TABLE_END()
1080+
10771081
#define NGRID_IQ1S 2048
10781082
#define IQ1S_DELTA 0.125f
10791083
#define IQ1M_DELTA 0.125f

0 commit comments

Comments
 (0)