Skip to content

Commit bd36d9f

Browse files
Merge pull request #28 from menloresearch/update-dev-from-master-2025-03-25-00-08
Sync master with upstream release b4951
2 parents 8abcfb5 + 2b65ae3 commit bd36d9f

File tree

15 files changed

+142
-162
lines changed

15 files changed

+142
-162
lines changed

ci/run.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -826,16 +826,20 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
826826
fi
827827

828828
ret=0
829-
830-
test $ret -eq 0 && gg_run ctest_debug
829+
if [ -z ${GG_BUILD_SYCL} ]; then
830+
# SYCL build breaks with debug build flags
831+
test $ret -eq 0 && gg_run ctest_debug
832+
fi
831833
test $ret -eq 0 && gg_run ctest_release
832834

833835
if [ -z ${GG_BUILD_LOW_PERF} ]; then
834836
test $ret -eq 0 && gg_run embd_bge_small
835837
test $ret -eq 0 && gg_run rerank_tiny
836838

837839
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
838-
test $ret -eq 0 && gg_run test_scripts_debug
840+
if [ -z ${GG_BUILD_SYCL} ]; then
841+
test $ret -eq 0 && gg_run test_scripts_debug
842+
fi
839843
test $ret -eq 0 && gg_run test_scripts_release
840844
fi
841845

@@ -846,7 +850,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
846850
test $ret -eq 0 && gg_run pythia_2_8b
847851
#test $ret -eq 0 && gg_run open_llama_7b_v2
848852
fi
849-
test $ret -eq 0 && gg_run ctest_with_model_debug
853+
if [ -z ${GG_BUILD_SYCL} ]; then
854+
test $ret -eq 0 && gg_run ctest_with_model_debug
855+
fi
850856
test $ret -eq 0 && gg_run ctest_with_model_release
851857
fi
852858
fi

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
705705
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
706706
# ref: https://huggingface.co/Xenova/gpt-4o
707707
res = "gpt-4o"
708+
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709+
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710+
res = "superbpe"
708711

709712
if res is None:
710713
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class TOKENIZER_TYPE(IntEnum):
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113+
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
113114
]
114115

115116

docs/cuda-fedora.md renamed to docs/backend/CUDA-FEDORA.md

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@ In this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox
1414
- [Creating a Fedora Toolbox Environment](#creating-a-fedora-toolbox-environment)
1515
- [Installing Essential Development Tools](#installing-essential-development-tools)
1616
- [Adding the CUDA Repository](#adding-the-cuda-repository)
17-
- [Installing `nvidia-driver-libs`](#installing-nvidia-driver-libs)
18-
- [Manually Resolving Package Conflicts](#manually-resolving-package-conflicts)
19-
- [Finalizing the Installation of `nvidia-driver-libs`](#finalizing-the-installation-of-nvidia-driver-libs)
17+
- [Installing Nvidia Driver Libraries](#installing-nvidia-driver-libraries)
2018
- [Installing the CUDA Meta-Package](#installing-the-cuda-meta-package)
2119
- [Configuring the Environment](#configuring-the-environment)
2220
- [Verifying the Installation](#verifying-the-installation)
@@ -67,7 +65,7 @@ This guide focuses on Fedora hosts, but with small adjustments, it can work for
6765
sudo dnf distro-sync
6866
```
6967

70-
2. **Install the Default Text Editor (Optional):**
68+
2. **Install **Vim** the default text editor (Optional):**
7169

7270
```bash
7371
sudo dnf install vim-default-editor --allowerasing
@@ -97,60 +95,75 @@ After adding the repository, synchronize the package manager again:
9795
sudo dnf distro-sync
9896
```
9997

100-
## Installing `nvidia-driver-libs` and `nvidia-driver-cuda-libs`
98+
## Installing Nvidia Driver Libraries
10199

102-
We need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go).
100+
First, we need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go):
103101

104102
```bash
105103
ls -la /usr/lib64/libcuda.so.1
106104
```
107105

108-
**Explanation:**
106+
### If *`libcuda.so.1`* is missing:
107+
108+
```
109+
ls: cannot access '/usr/lib64/libcuda.so.1': No such file or directory
110+
```
109111

110-
- `nvidia-driver-libs` and `nvidia-driver-cuda-libs` contains necessary NVIDIA driver libraries required by CUDA,
111-
on hosts with NVIDIA drivers installed the Fedora Container will supply the host libraries.
112+
**Explanation:**
113+
The host dose not supply the CUDA drivers, **install them now:**
112114

113-
### Install Nvidia Driver Libraries on Guest (if `libcuda.so.1` was NOT found).
115+
#### Install the Nvidia Driver Libraries on Guest:
114116

115117
```bash
116-
sudo dnf install nvidia-driver-libs nvidia-driver-cuda-libs
118+
sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
117119
```
118120

119-
### Manually Updating the RPM database for host-supplied NVIDIA drivers (if `libcuda.so.1` was found).
121+
### If *`libcuda.so.1`* exists:
122+
```
123+
lrwxrwxrwx. 1 root root 21 Mar 24 11:26 /usr/lib64/libcuda.so.1 -> libcuda.so.570.133.07
124+
```
125+
126+
**Explanation:**
127+
The host is supply the CUDA drivers, **we need to update the guest RPM Database accordingly:**
120128

121-
If the installation fails due to conflicts, we'll manually download and install the required packages, excluding conflicting files.
129+
#### Update the Toolbox RPM Database to include the Host-Supplied Libraries:
122130

123-
#### 1. Download `nvidia-driver-libs` and `nvidia-driver-cuda-libs` RPM's (with dependencies)
131+
Note: we do not actually install the libraries, we just update the DB so that the guest system knows they are supplied by the host.
132+
133+
##### 1. Download `nvidia-` parts that are supplied by the host RPM's (with dependencies)
124134

125135
```bash
126-
sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-libs nvidia-driver-cuda-libs
136+
sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
127137
```
128138

129-
#### 2. Update the RPM database to assume the installation of these packages.
139+
##### 2. Update the RPM database to assume the installation of these packages.
130140

131141
```bash
132142
sudo rpm --install --verbose --hash --justdb /tmp/nvidia-driver-libs/*
133143
```
134144

135145
**Note:**
136146

137-
- The `--justdb` option only updates the RPM database, without touching the filesystem.
147+
- The `--justdb` option only updates the RPM database, without touching the filesystem elsewhere.
148+
149+
##### Check that the RPM Database has been correctly updated:
138150

139-
#### Finalizing the Installation of `nvidia-driver-libs` and `nvidia-driver-cuda-libs`
151+
**Note:** This is the same command as in the *"Install the Nvidia Driver Libraries on Guest"* for if *`libcuda.so.1`* was missing.
140152

141-
After manually installing the dependencies, run:
142153

143154
```bash
144-
sudo dnf install nvidia-driver-libs nvidia-driver-cuda-libs
155+
sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
145156
```
146157

147-
You should receive a message indicating the package is already installed:
158+
*(this time it will not install anything, as the database things that these packages are already installed)*
148159

149160
```
150161
Updating and loading repositories:
151162
Repositories loaded.
152-
Package "nvidia-driver-libs-3:570.86.10-1.fc41.x86_64" is already installed.
153-
Package "nvidia-driver-cuda-libs-3:570.86.10-1.fc41.x86_64" is already installed.
163+
Package "nvidia-driver-cuda-3:570.124.06-1.fc41.x86_64" is already installed.
164+
Package "nvidia-driver-libs-3:570.124.06-1.fc41.x86_64" is already installed.
165+
Package "nvidia-driver-cuda-libs-3:570.124.06-1.fc41.x86_64" is already installed.
166+
Package "nvidia-persistenced-3:570.124.06-1.fc41.x86_64" is already installed.
154167
155168
Nothing to do.
156169
```
@@ -207,9 +220,9 @@ You should see output similar to:
207220
```
208221
nvcc: NVIDIA (R) Cuda compiler driver
209222
Copyright (c) 2005-2025 NVIDIA Corporation
210-
Built on Wed_Jan_15_19:20:09_PST_2025
211-
Cuda compilation tools, release 12.8, V12.8.61
212-
Build cuda_12.8.r12.8/compiler.35404655_0
223+
Built on Fri_Feb_21_20:23:50_PST_2025
224+
Cuda compilation tools, release 12.8, V12.8.93
225+
Build cuda_12.8.r12.8/compiler.35583870_0
213226
```
214227

215228
This output confirms that the CUDA compiler is accessible and indicates the installed version.

docs/build.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,14 @@ You may find the official downloads here: [NVIDIA developer site](https://develo
132132
133133
134134
#### Compile and run inside a Fedora Toolbox Container
135-
We also have a [guide](./cuda-fedora.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
135+
We also have a [guide](./backend/CUDA-FEDORA.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
136136
137137
**Recommended for:**
138-
139-
- ***Particularly*** *convenient* for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
140-
- Toolbox is installed by default: [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde).
138+
- ***Necessary*** for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
139+
- (there are no supported CUDA packages for these systems)
140+
- ***Necessary*** for users that have a host that is not a: [Supported Nvidia CUDA Release Platform](https://developer.nvidia.com/cuda-downloads).
141+
- (for example, you may have [Fedora 42 Beta](https://fedoramagazine.org/announcing-fedora-linux-42-beta/) as your your host operating system)
142+
- ***Convenient*** For those running [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde), and want to keep their host system clean.
141143
- *Optionally* toolbox packages are available: [Arch Linux](https://archlinux.org/), [Red Hat Enterprise Linux >= 8.5](https://www.redhat.com/en/technologies/linux-platforms/enterprise-linux), or [Ubuntu](https://ubuntu.com/download)
142144
143145

ggml/src/ggml-cuda/common.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,14 +243,14 @@ static bool fp16_mma_available(const int cc) {
243243
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN)
244244
return false;
245245
#else
246-
return GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA ||
246+
return (GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA) ||
247247
GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA3(cc);
248248
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN)
249249
}
250250

251251
// To be used for feature selection of external libraries, e.g. cuBLAS.
252252
static bool fp16_mma_hardware_available(const int cc) {
253-
return GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA ||
253+
return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA) ||
254254
GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA3(cc);
255255
}
256256

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1192,7 +1192,7 @@ static void ggml_cuda_op_mul_mat_cublas(
11921192

11931193
const bool use_fp16 = (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT;
11941194

1195-
if (((cc >= GGML_CUDA_CC_VOLTA && GGML_CUDA_CC_IS_NVIDIA(cc)) || GGML_CUDA_CC_IS_AMD(cc)) && use_fp16) {
1195+
if (((GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA) || GGML_CUDA_CC_IS_AMD(cc)) && use_fp16) {
11961196
// convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32
11971197
ggml_cuda_pool_alloc<half> src0_as_f16(ctx.pool(id));
11981198
if (src0->type != GGML_TYPE_F16) {

ggml/src/ggml-cuda/mmq.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ void ggml_cuda_op_mul_mat_q(
2727
// The stream-k decomposition is only faster for recent NVIDIA GPUs.
2828
// Also its fixup needs to allocate a temporary buffer in the memory pool.
2929
// There are multiple parallel CUDA streams for src1_ncols != ne11 which would introduce a race condition for this buffer.
30-
const bool use_stream_k = ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA &&
31-
GGML_CUDA_CC_IS_NVIDIA(cc) && src1_ncols == ne11;
30+
const bool use_stream_k = GGML_CUDA_CC_IS_NVIDIA(cc) &&
31+
ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA && src1_ncols == ne11;
3232
const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst, use_stream_k};
3333

3434
switch (src0->type) {

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ struct tile_x_sizes {
9090

9191
static int get_mmq_x_max_host(const int cc) {
9292
return new_mma_available(cc) ? 128 :
93-
ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA && GGML_CUDA_CC_IS_NVIDIA(cc) ?
93+
GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA ?
9494
#ifdef GGML_CUDA_FORCE_MMQ
9595
128 : 64;
9696
#else
@@ -124,7 +124,7 @@ static constexpr __device__ int get_mmq_x_max_device() {
124124

125125
static int get_mmq_y_host(const int cc) {
126126
return GGML_CUDA_CC_IS_AMD(cc) ? (GGML_CUDA_CC_IS_RDNA1(cc) ? 64 : 128) :
127-
((ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA && GGML_CUDA_CC_IS_NVIDIA(cc)) ? 128 : 64);
127+
((GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA) ? 128 : 64);
128128
}
129129

130130
static constexpr __device__ int get_mmq_y_device() {
@@ -2832,7 +2832,7 @@ void mul_mat_q_case(ggml_backend_cuda_context & ctx, const mmq_args & args, cuda
28322832
const int mmq_x_max = get_mmq_x_max_host(cc);
28332833
const int mmq_y = get_mmq_y_host(cc);
28342834
const int block_num_y = (args.ne01 + mmq_y - 1) / mmq_y;
2835-
const bool use_stream_k = ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA && GGML_CUDA_CC_IS_NVIDIA(cc);
2835+
const bool use_stream_k = GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA;
28362836

28372837
int mmq_x_best = 0;
28382838
int nparts_best = INT_MAX;

0 commit comments

Comments
 (0)