Skip to content

Commit 82889bb

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 1ec1c1e + 48d7021 commit 82889bb

25 files changed

+439
-151
lines changed

ci/run.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -826,16 +826,20 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
826826
fi
827827

828828
ret=0
829-
830-
test $ret -eq 0 && gg_run ctest_debug
829+
if [ -z ${GG_BUILD_SYCL} ]; then
830+
# SYCL build breaks with debug build flags
831+
test $ret -eq 0 && gg_run ctest_debug
832+
fi
831833
test $ret -eq 0 && gg_run ctest_release
832834

833835
if [ -z ${GG_BUILD_LOW_PERF} ]; then
834836
test $ret -eq 0 && gg_run embd_bge_small
835837
test $ret -eq 0 && gg_run rerank_tiny
836838

837839
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
838-
test $ret -eq 0 && gg_run test_scripts_debug
840+
if [ -z ${GG_BUILD_SYCL} ]; then
841+
test $ret -eq 0 && gg_run test_scripts_debug
842+
fi
839843
test $ret -eq 0 && gg_run test_scripts_release
840844
fi
841845

@@ -846,7 +850,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
846850
test $ret -eq 0 && gg_run pythia_2_8b
847851
#test $ret -eq 0 && gg_run open_llama_7b_v2
848852
fi
849-
test $ret -eq 0 && gg_run ctest_with_model_debug
853+
if [ -z ${GG_BUILD_SYCL} ]; then
854+
test $ret -eq 0 && gg_run ctest_with_model_debug
855+
fi
850856
test $ret -eq 0 && gg_run ctest_with_model_release
851857
fi
852858
fi

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
705705
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
706706
# ref: https://huggingface.co/Xenova/gpt-4o
707707
res = "gpt-4o"
708+
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709+
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710+
res = "superbpe"
708711

709712
if res is None:
710713
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class TOKENIZER_TYPE(IntEnum):
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113+
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
113114
]
114115

115116

docs/cuda-fedora.md renamed to docs/backend/CUDA-FEDORA.md

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@ In this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox
1414
- [Creating a Fedora Toolbox Environment](#creating-a-fedora-toolbox-environment)
1515
- [Installing Essential Development Tools](#installing-essential-development-tools)
1616
- [Adding the CUDA Repository](#adding-the-cuda-repository)
17-
- [Installing `nvidia-driver-libs`](#installing-nvidia-driver-libs)
18-
- [Manually Resolving Package Conflicts](#manually-resolving-package-conflicts)
19-
- [Finalizing the Installation of `nvidia-driver-libs`](#finalizing-the-installation-of-nvidia-driver-libs)
17+
- [Installing Nvidia Driver Libraries](#installing-nvidia-driver-libraries)
2018
- [Installing the CUDA Meta-Package](#installing-the-cuda-meta-package)
2119
- [Configuring the Environment](#configuring-the-environment)
2220
- [Verifying the Installation](#verifying-the-installation)
@@ -67,7 +65,7 @@ This guide focuses on Fedora hosts, but with small adjustments, it can work for
6765
sudo dnf distro-sync
6866
```
6967

70-
2. **Install the Default Text Editor (Optional):**
68+
2. **Install **Vim** the default text editor (Optional):**
7169

7270
```bash
7371
sudo dnf install vim-default-editor --allowerasing
@@ -97,60 +95,75 @@ After adding the repository, synchronize the package manager again:
9795
sudo dnf distro-sync
9896
```
9997

100-
## Installing `nvidia-driver-libs` and `nvidia-driver-cuda-libs`
98+
## Installing Nvidia Driver Libraries
10199

102-
We need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go).
100+
First, we need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go):
103101

104102
```bash
105103
ls -la /usr/lib64/libcuda.so.1
106104
```
107105

108-
**Explanation:**
106+
### If *`libcuda.so.1`* is missing:
107+
108+
```
109+
ls: cannot access '/usr/lib64/libcuda.so.1': No such file or directory
110+
```
109111

110-
- `nvidia-driver-libs` and `nvidia-driver-cuda-libs` contains necessary NVIDIA driver libraries required by CUDA,
111-
on hosts with NVIDIA drivers installed the Fedora Container will supply the host libraries.
112+
**Explanation:**
113+
The host dose not supply the CUDA drivers, **install them now:**
112114

113-
### Install Nvidia Driver Libraries on Guest (if `libcuda.so.1` was NOT found).
115+
#### Install the Nvidia Driver Libraries on Guest:
114116

115117
```bash
116-
sudo dnf install nvidia-driver-libs nvidia-driver-cuda-libs
118+
sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
117119
```
118120

119-
### Manually Updating the RPM database for host-supplied NVIDIA drivers (if `libcuda.so.1` was found).
121+
### If *`libcuda.so.1`* exists:
122+
```
123+
lrwxrwxrwx. 1 root root 21 Mar 24 11:26 /usr/lib64/libcuda.so.1 -> libcuda.so.570.133.07
124+
```
125+
126+
**Explanation:**
127+
The host is supply the CUDA drivers, **we need to update the guest RPM Database accordingly:**
120128

121-
If the installation fails due to conflicts, we'll manually download and install the required packages, excluding conflicting files.
129+
#### Update the Toolbox RPM Database to include the Host-Supplied Libraries:
122130

123-
#### 1. Download `nvidia-driver-libs` and `nvidia-driver-cuda-libs` RPM's (with dependencies)
131+
Note: we do not actually install the libraries, we just update the DB so that the guest system knows they are supplied by the host.
132+
133+
##### 1. Download `nvidia-` parts that are supplied by the host RPM's (with dependencies)
124134

125135
```bash
126-
sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-libs nvidia-driver-cuda-libs
136+
sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
127137
```
128138

129-
#### 2. Update the RPM database to assume the installation of these packages.
139+
##### 2. Update the RPM database to assume the installation of these packages.
130140

131141
```bash
132142
sudo rpm --install --verbose --hash --justdb /tmp/nvidia-driver-libs/*
133143
```
134144

135145
**Note:**
136146

137-
- The `--justdb` option only updates the RPM database, without touching the filesystem.
147+
- The `--justdb` option only updates the RPM database, without touching the filesystem elsewhere.
148+
149+
##### Check that the RPM Database has been correctly updated:
138150

139-
#### Finalizing the Installation of `nvidia-driver-libs` and `nvidia-driver-cuda-libs`
151+
**Note:** This is the same command as in the *"Install the Nvidia Driver Libraries on Guest"* for if *`libcuda.so.1`* was missing.
140152

141-
After manually installing the dependencies, run:
142153

143154
```bash
144-
sudo dnf install nvidia-driver-libs nvidia-driver-cuda-libs
155+
sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
145156
```
146157

147-
You should receive a message indicating the package is already installed:
158+
*(this time it will not install anything, as the database things that these packages are already installed)*
148159

149160
```
150161
Updating and loading repositories:
151162
Repositories loaded.
152-
Package "nvidia-driver-libs-3:570.86.10-1.fc41.x86_64" is already installed.
153-
Package "nvidia-driver-cuda-libs-3:570.86.10-1.fc41.x86_64" is already installed.
163+
Package "nvidia-driver-cuda-3:570.124.06-1.fc41.x86_64" is already installed.
164+
Package "nvidia-driver-libs-3:570.124.06-1.fc41.x86_64" is already installed.
165+
Package "nvidia-driver-cuda-libs-3:570.124.06-1.fc41.x86_64" is already installed.
166+
Package "nvidia-persistenced-3:570.124.06-1.fc41.x86_64" is already installed.
154167
155168
Nothing to do.
156169
```
@@ -207,9 +220,9 @@ You should see output similar to:
207220
```
208221
nvcc: NVIDIA (R) Cuda compiler driver
209222
Copyright (c) 2005-2025 NVIDIA Corporation
210-
Built on Wed_Jan_15_19:20:09_PST_2025
211-
Cuda compilation tools, release 12.8, V12.8.61
212-
Build cuda_12.8.r12.8/compiler.35404655_0
223+
Built on Fri_Feb_21_20:23:50_PST_2025
224+
Cuda compilation tools, release 12.8, V12.8.93
225+
Build cuda_12.8.r12.8/compiler.35583870_0
213226
```
214227

215228
This output confirms that the CUDA compiler is accessible and indicates the installed version.

docs/build.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,14 @@ You may find the official downloads here: [NVIDIA developer site](https://develo
132132
133133
134134
#### Compile and run inside a Fedora Toolbox Container
135-
We also have a [guide](./cuda-fedora.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
135+
We also have a [guide](./backend/CUDA-FEDORA.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
136136
137137
**Recommended for:**
138-
139-
- ***Particularly*** *convenient* for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
140-
- Toolbox is installed by default: [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde).
138+
- ***Necessary*** for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
139+
- (there are no supported CUDA packages for these systems)
140+
- ***Necessary*** for users that have a host that is not a: [Supported Nvidia CUDA Release Platform](https://developer.nvidia.com/cuda-downloads).
141+
- (for example, you may have [Fedora 42 Beta](https://fedoramagazine.org/announcing-fedora-linux-42-beta/) as your your host operating system)
142+
- ***Convenient*** For those running [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde), and want to keep their host system clean.
141143
- *Optionally* toolbox packages are available: [Arch Linux](https://archlinux.org/), [Red Hat Enterprise Linux >= 8.5](https://www.redhat.com/en/technologies/linux-platforms/enterprise-linux), or [Ubuntu](https://ubuntu.com/download)
142144
143145

docs/install.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@ brew install llama.cpp
99
```
1010
The formula is automatically updated with new `llama.cpp` releases. More info: https://github.com/ggml-org/llama.cpp/discussions/7668
1111

12+
## MacPorts
13+
14+
```sh
15+
sudo port install llama.cpp
16+
```
17+
see also: https://ports.macports.org/port/llama.cpp/details/
18+
1219
## Nix
1320

1421
On Mac and Linux, the Nix package manager can be used via

examples/server/server.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,11 @@ struct server_task_result_cmpl_final : server_task_result {
830830
ret.push_back({"timings", timings.to_json()});
831831
}
832832

833+
// extra fields for debugging purposes
834+
if (verbose) {
835+
ret["__verbose"] = to_json_non_oaicompat();
836+
}
837+
833838
return ret;
834839
}
835840
};

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3110,17 +3110,17 @@ static void ggml_compute_forward_dup_same_cont(
31103110
const int ith = params->ith; // thread index
31113111
const int nth = params->nth; // number of threads
31123112

3113-
// parallelize by elements
3114-
const int ne = ggml_nelements(dst);
3115-
const int dr = (ne + nth - 1) / nth;
3116-
const int ie0 = dr * ith;
3117-
const int ie1 = MIN(ie0 + dr, ne);
3113+
// parallelize by blocks
3114+
const int nk = ggml_nelements(src0)/ggml_blck_size(src0->type);
3115+
const int dr = (nk + nth - 1) / nth;
3116+
const int k0 = dr * ith;
3117+
const int k1 = MIN(k0 + dr, nk);
31183118

3119-
if (ie0 < ie1) {
3119+
if (k0 < k1) {
31203120
memcpy(
3121-
((char *) dst->data + ie0*nb0),
3122-
((char *) src0->data + ie0*nb0),
3123-
(ie1 - ie0) * nb0);
3121+
((char *) dst->data + k0*nb0),
3122+
((char *) src0->data + k0*nb0),
3123+
(k1 - k0) * nb0);
31243124
}
31253125
}
31263126

@@ -4055,7 +4055,6 @@ static void ggml_compute_forward_dup_f32(
40554055
static void ggml_compute_forward_dup_bytes(
40564056
const struct ggml_compute_params * params,
40574057
struct ggml_tensor * dst) {
4058-
40594058
const struct ggml_tensor * src0 = dst->src[0];
40604059

40614060
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
@@ -4069,10 +4068,10 @@ static void ggml_compute_forward_dup_bytes(
40694068
}
40704069

40714070
const size_t type_size = ggml_type_size(src0->type);
4071+
40724072
const int ith = params->ith; // thread index
40734073
const int nth = params->nth; // number of threads
40744074

4075-
40764075
// parallelize by rows
40774076
const int nr = ne01;
40784077
// number of rows per thread
@@ -4082,10 +4081,10 @@ static void ggml_compute_forward_dup_bytes(
40824081
const int ir1 = MIN(ir0 + dr, nr);
40834082

40844083
if (src0->type == dst->type &&
4085-
ne00 == ne0 &&
4084+
ggml_are_same_shape(src0, dst) &&
40864085
nb00 == type_size && nb0 == type_size) {
40874086
// copy by rows
4088-
const size_t rs = ne00 * type_size;
4087+
const size_t rs = ggml_row_size(src0->type, ne00);
40894088
for (int64_t i03 = 0; i03 < ne03; i03++) {
40904089
for (int64_t i02 = 0; i02 < ne02; i02++) {
40914090
for (int64_t i01 = ir0; i01 < ir1; i01++) {
@@ -4140,17 +4139,20 @@ static void ggml_compute_forward_dup_bytes(
41404139
}
41414140

41424141
// dst counters
4143-
4144-
int64_t i10 = 0;
4142+
int64_t k10 = 0;
41454143
int64_t i11 = 0;
41464144
int64_t i12 = 0;
41474145
int64_t i13 = 0;
41484146

4147+
// number of blocks in a row
4148+
const int64_t nk00 = ne00 / ggml_blck_size(src0->type);
4149+
const int64_t nk0 = ne0 / ggml_blck_size(dst->type);
4150+
41494151
for (int64_t i03 = 0; i03 < ne03; i03++) {
41504152
for (int64_t i02 = 0; i02 < ne02; i02++) {
4151-
i10 += ne00 * ir0;
4152-
while (i10 >= ne0) {
4153-
i10 -= ne0;
4153+
k10 += nk00 * ir0;
4154+
while (k10 >= nk0) {
4155+
k10 -= nk0;
41544156
if (++i11 == ne1) {
41554157
i11 = 0;
41564158
if (++i12 == ne2) {
@@ -4162,14 +4164,14 @@ static void ggml_compute_forward_dup_bytes(
41624164
}
41634165
}
41644166
for (int64_t i01 = ir0; i01 < ir1; i01++) {
4165-
for (int64_t i00 = 0; i00 < ne00; i00++) {
4166-
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
4167-
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
4167+
for (int64_t k00 = 0; k00 < nk00; k00++) {
4168+
const char * src0_ptr = ((char *) src0->data + k00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
4169+
char * dst_ptr = ((char *) dst->data + k10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
41684170

41694171
memcpy(dst_ptr, src0_ptr, type_size);
41704172

4171-
if (++i10 == ne0) {
4172-
i10 = 0;
4173+
if (++k10 == nk0) {
4174+
k10 = 0;
41734175
if (++i11 == ne1) {
41744176
i11 = 0;
41754177
if (++i12 == ne2) {
@@ -4182,9 +4184,9 @@ static void ggml_compute_forward_dup_bytes(
41824184
}
41834185
}
41844186
}
4185-
i10 += ne00 * (ne01 - ir1);
4186-
while (i10 >= ne0) {
4187-
i10 -= ne0;
4187+
k10 += nk00 * (ne01 - ir1);
4188+
while (k10 >= nk0) {
4189+
k10 -= nk0;
41884190
if (++i11 == ne1) {
41894191
i11 = 0;
41904192
if (++i12 == ne2) {
@@ -14308,7 +14310,9 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1430814310
}
1430914311

1431014312
// extra_buffer op?
14311-
if (ggml_cpu_extra_compute_forward(params, tensor)) return;
14313+
if (ggml_cpu_extra_compute_forward(params, tensor)) {
14314+
return;
14315+
}
1431214316

1431314317
switch (tensor->op) {
1431414318
case GGML_OP_DUP:

0 commit comments

Comments
 (0)