Skip to content

Commit a63b7b5

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 1392cd5 + 152729f commit a63b7b5

File tree

8 files changed

+78
-82
lines changed

8 files changed

+78
-82
lines changed

.github/workflows/build.yml

Lines changed: 42 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,8 +1302,8 @@ jobs:
13021302
run: |
13031303
GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13041304
1305-
ggml-ci-x64-nvidia-v100-cuda:
1306-
runs-on: [self-hosted, Linux, X64, NVIDIA, V100]
1305+
ggml-ci-x64-nvidia-cuda:
1306+
runs-on: [self-hosted, Linux, X64, NVIDIA]
13071307

13081308
steps:
13091309
- name: Clone
@@ -1316,8 +1316,8 @@ jobs:
13161316
nvidia-smi
13171317
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13181318
1319-
ggml-ci-x64-nvidia-v100-vulkan:
1320-
runs-on: [self-hosted, Linux, X64, NVIDIA, V100]
1319+
ggml-ci-x64-nvidia-vulkan-cm:
1320+
runs-on: [self-hosted, Linux, X64, NVIDIA]
13211321

13221322
steps:
13231323
- name: Clone
@@ -1327,25 +1327,11 @@ jobs:
13271327
- name: Test
13281328
id: ggml-ci
13291329
run: |
1330-
vulkaninfo
1331-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1332-
1333-
ggml-ci-x64-nvidia-t4-cuda:
1334-
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1335-
1336-
steps:
1337-
- name: Clone
1338-
id: checkout
1339-
uses: actions/checkout@v4
1340-
1341-
- name: Test
1342-
id: ggml-ci
1343-
run: |
1344-
nvidia-smi
1345-
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1330+
vulkaninfo --summary
1331+
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13461332
1347-
ggml-ci-x64-nvidia-t4-vulkan:
1348-
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1333+
ggml-ci-x64-nvidia-vulkan-cm2:
1334+
runs-on: [self-hosted, Linux, X64, NVIDIA, COOPMAT2]
13491335

13501336
steps:
13511337
- name: Clone
@@ -1355,23 +1341,9 @@ jobs:
13551341
- name: Test
13561342
id: ggml-ci
13571343
run: |
1358-
vulkaninfo
1344+
vulkaninfo --summary
13591345
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13601346
1361-
ggml-ci-x64-nvidia-t4-vulkan-coopmat1:
1362-
runs-on: [self-hosted, Linux, X64, NVIDIA, T4]
1363-
1364-
steps:
1365-
- name: Clone
1366-
id: checkout
1367-
uses: actions/checkout@v4
1368-
1369-
- name: Test
1370-
id: ggml-ci
1371-
run: |
1372-
vulkaninfo
1373-
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1374-
13751347
ggml-ci-x64-cpu-amx:
13761348
runs-on: [self-hosted, Linux, X64, CPU, AMX]
13771349

@@ -1385,21 +1357,36 @@ jobs:
13851357
run: |
13861358
bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
13871359
1388-
ggml-ci-x64-amd-v710-vulkan:
1389-
runs-on: [self-hosted, Linux, X64, AMD, V710]
1390-
1391-
steps:
1392-
- name: Clone
1393-
id: checkout
1394-
uses: actions/checkout@v4
1395-
1396-
- name: Test
1397-
id: ggml-ci
1398-
run: |
1399-
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1360+
# ggml-ci-x64-amd-vulkan:
1361+
# runs-on: [self-hosted, Linux, X64, AMD]
1362+
#
1363+
# steps:
1364+
# - name: Clone
1365+
# id: checkout
1366+
# uses: actions/checkout@v4
1367+
#
1368+
# - name: Test
1369+
# id: ggml-ci
1370+
# run: |
1371+
# vulkaninfo --summary
1372+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1373+
#
1374+
# ggml-ci-x64-amd-rocm:
1375+
# runs-on: [self-hosted, Linux, X64, AMD]
1376+
#
1377+
# steps:
1378+
# - name: Clone
1379+
# id: checkout
1380+
# uses: actions/checkout@v4
1381+
#
1382+
# - name: Test
1383+
# id: ggml-ci
1384+
# run: |
1385+
# amd-smi static
1386+
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
14001387

1401-
ggml-ci-x64-amd-v710-rocm:
1402-
runs-on: [self-hosted, Linux, X64, AMD, V710]
1388+
ggml-ci-mac-metal:
1389+
runs-on: [self-hosted, macOS, ARM64]
14031390

14041391
steps:
14051392
- name: Clone
@@ -1409,9 +1396,9 @@ jobs:
14091396
- name: Test
14101397
id: ggml-ci
14111398
run: |
1412-
GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
1399+
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
14131400
1414-
ggml-ci-mac-metal:
1401+
ggml-ci-mac-vulkan:
14151402
runs-on: [self-hosted, macOS, ARM64]
14161403

14171404
steps:
@@ -1422,18 +1409,5 @@ jobs:
14221409
- name: Test
14231410
id: ggml-ci
14241411
run: |
1425-
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1426-
1427-
# TODO: install vulkan drivers
1428-
# ggml-ci-mac-vulkan:
1429-
# runs-on: [self-hosted, macOS, ARM64]
1430-
#
1431-
# steps:
1432-
# - name: Clone
1433-
# id: checkout
1434-
# uses: actions/checkout@v4
1435-
#
1436-
# - name: Test
1437-
# id: ggml-ci
1438-
# run: |
1439-
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
1412+
vulkaninfo --summary
1413+
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

CODEOWNERS

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,12 @@
9595
/tools/tokenize/ @ggerganov
9696
/tools/tts/ @ggerganov
9797
/vendor/ @ggerganov
98-
.clang-format @slaren
99-
.clang-tidy @slaren
100-
AUTHORS @ggerganov
101-
CMakeLists.txt @ggerganov
102-
CONTRIBUTING.md @ggerganov
103-
LICENSE @ggerganov
104-
README.md @ggerganov
105-
SECURITY.md @ggerganov
98+
/.clang-format @slaren
99+
/.clang-tidy @slaren
100+
/AUTHORS @ggerganov
101+
/CMakeLists.txt @ggerganov
102+
/CONTRIBUTING.md @ggerganov
103+
/LICENSE @ggerganov
104+
/README.md @ggerganov
105+
/SECURITY.md @ggerganov
106106
requirements*.txt @CISC

ci/run.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ fi
9292

9393
if [ ! -z ${GG_BUILD_VULKAN} ]; then
9494
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
95+
96+
# if on Mac, disable METAL
97+
if [[ "$OSTYPE" == "darwin"* ]]; then
98+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"
99+
fi
100+
95101
fi
96102

97103
if [ ! -z ${GG_BUILD_WEBGPU} ]; then

common/common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <climits>
1515
#include <cmath>
1616
#include <codecvt>
17+
#include <chrono>
1718
#include <cstdarg>
1819
#include <cstring>
1920
#include <ctime>

examples/model-conversion/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,12 @@ new model, the model can be converted to GGUF format using the following command
105105
### Inspecting the converted model
106106
The converted model can be inspected using the following command:
107107
```console
108-
(venv) $ make inspect-converted-model
108+
(venv) $ make causal-inspect-converted-model
109109
```
110110

111111
### Running the converted model
112112
```console
113-
(venv) $ make run-converted-model
113+
(venv) $ make causal-run-converted-model
114114
```
115115

116116
### Model logits verfication

examples/model-conversion/scripts/causal/run-org-model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def fn(_m, input, output):
193193
print(f"Tokenized: {tokenizer.convert_ids_to_tokens(input_ids[0])}")
194194

195195
with torch.no_grad():
196-
outputs = model(input_ids)
196+
outputs = model(input_ids.to(model.device))
197197
logits = outputs.logits
198198

199199
# Extract logits for the last token (next token prediction)

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -473,10 +473,10 @@ struct ggml_threadpool {
473473
struct ggml_compute_state {
474474
#ifndef GGML_USE_OPENMP
475475
ggml_thread_t thrd;
476-
bool cpumask[GGML_MAX_N_THREADS];
477476
int last_graph;
478477
bool pending;
479478
#endif
479+
bool cpumask[GGML_MAX_N_THREADS];
480480
struct ggml_threadpool * threadpool;
481481
int ith;
482482
};
@@ -3081,7 +3081,14 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
30813081

30823082
threadpool->workers = workers;
30833083

3084-
#ifndef GGML_USE_OPENMP
3084+
#ifdef GGML_USE_OPENMP
3085+
int32_t cpumask_iter = 0;
3086+
3087+
// Compute CPU masks for each thread
3088+
for (int j = 0; j < tpp->n_threads; j++) {
3089+
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
3090+
}
3091+
#else // GGML_USE_OPENMP
30853092
ggml_mutex_init(&threadpool->mutex);
30863093
ggml_cond_init(&threadpool->cond);
30873094

@@ -3154,7 +3161,14 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
31543161
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
31553162
}
31563163

3157-
ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
3164+
// Apply thread CPU mask and priority
3165+
int ith = omp_get_thread_num();
3166+
3167+
ggml_thread_apply_priority(threadpool->prio);
3168+
if (ggml_thread_cpumask_is_valid(threadpool->workers[ith].cpumask)) {
3169+
ggml_thread_apply_affinity(threadpool->workers[ith].cpumask);
3170+
}
3171+
ggml_graph_compute_thread(&threadpool->workers[ith]);
31583172
}
31593173
} else {
31603174
atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);

ggml/src/ggml-quants.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3721,6 +3721,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT
37213721
}
37223722
float best = 0;
37233723
float scale = max/(2*kMaxQ-1);
3724+
for (int k = 0; k < 8; ++k) is_on_grid[k] = true;
37243725
for (int is = -15; is <= 15; ++is) {
37253726
float id = (2*kMaxQ-1+is*0.2f)/max;
37263727
float this_scale = 1/id;

0 commit comments

Comments
 (0)