Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .devops/cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ RUN apt-get update \
git \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
Expand Down
1 change: 0 additions & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
/tools/server/ @ngxson
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
/ggml/src/ggml-opt.cpp @JohannesGaessler
/ggml/src/gguf.cpp @JohannesGaessler
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ LLM inference in C/C++

## Hot topics

- **[guide : running gpt-oss with llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/15396)**
- **[[FEEDBACK] Better packaging for llama.cpp to support downstream consumers 🤗](https://github.com/ggml-org/llama.cpp/discussions/15313)**
- Support for the `gpt-oss` model with native MXFP4 format has been added | [PR](https://github.com/ggml-org/llama.cpp/pull/15091) | [Collaboration with NVIDIA](https://blogs.nvidia.com/blog/rtx-ai-garage-openai-oss) | [Comment](https://github.com/ggml-org/llama.cpp/discussions/15095)
- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen)
Expand Down
10 changes: 5 additions & 5 deletions ggml/src/ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8
for (int i = 0; i < n; ++i) {
L[i] += nmax;
}
return sumlx / suml2;
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
}
for (int i = 0; i < n; ++i) {
int l = nearest_int(iscale * x[i]);
Expand Down Expand Up @@ -901,7 +901,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
for (int i = 0; i < n; ++i) {
max = MAX(max, x[i]);
}
if (!max) { // all zero
if (max < GROUP_MAX_EPS) { // all zero
for (int i = 0; i < n; ++i) { L[i] = 0; }
return 0.f;
}
Expand Down Expand Up @@ -966,7 +966,7 @@ static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint
break;
}
}
return sumlx/suml2;
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
}

static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
Expand Down Expand Up @@ -4266,7 +4266,7 @@ static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_R
sumw[j+1] = sumw[j] + weight[i];
}
}
float best_score = -FLT_MIN, scale = max;
float best_score = -FLT_MAX, scale = max;
int besti1 = -1, besti2 = -1, best_shift = 0;
for (int i1 = 0; i1 <= block_size; ++i1) {
for (int i2 = i1; i2 <= block_size; ++i2) {
Expand Down Expand Up @@ -4442,7 +4442,7 @@ static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_R
idx[2*j] = j;
}
qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
float best_score = -FLT_MIN, scale = max;
float best_score = -FLT_MAX, scale = max;
int besti1 = -1, besti2 = -1, best_k = -1;
// 0: +, +
// 1: +, -
Expand Down
3 changes: 2 additions & 1 deletion ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ void string_to_spv_func(const std::string& _name, const std::string& in_fname, c
std::string target_env = (name.find("_cm2") != std::string::npos) ? "--target-env=vulkan1.3" : "--target-env=vulkan1.2";

// disable spirv-opt for coopmat shaders for https://github.com/ggerganov/llama.cpp/issues/10734
std::string opt_level = coopmat ? "" : "-O";
// disable spirv-opt for bf16 shaders for https://github.com/ggml-org/llama.cpp/issues/15344
std::string opt_level = (coopmat || name.find("bf16") != std::string::npos) ? "" : "-O";

#ifdef _WIN32
std::vector<std::string> cmd = {GLSLC, "-fshader-stage=compute", target_env, opt_level, "\"" + in_path + "\"", "-o", "\"" + out_fname + "\""};
Expand Down
47 changes: 3 additions & 44 deletions scripts/sync-ggml-am.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,7 @@ while read c; do
cmake/common.cmake \
cmake/ggml-config.cmake.in \
src/ggml-cpu/cmake/FindSIMD.cmake \
src/ggml*.h \
src/ggml*.c \
src/ggml*.cpp \
src/gguf*.cpp \
src/ggml-blas/* \
src/ggml-cann/* \
src/ggml-cpu/* \
src/ggml-cuda/* \
src/ggml-hip/* \
src/ggml-metal/* \
src/ggml-musa/* \
src/ggml-opencl/* \
src/ggml-rpc/* \
src/ggml-sycl/* \
src/ggml-vulkan/* \
src/ggml* \
include/ggml*.h \
include/gguf*.h \
tests/test-opt.cpp \
Expand Down Expand Up @@ -131,21 +117,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
# cmake/ggml-config.cmake.in -> ggml/cmake/ggml-config.cmake.in
# src/ggml-cpu/cmake/FindSIMD.cmake -> ggml/src/ggml-cpu/cmake/FindSIMD.cmake
#
# src/ggml*.c -> ggml/src/ggml*.c
# src/ggml*.cpp -> ggml/src/ggml*.cpp
# src/ggml*.h -> ggml/src/ggml*.h
# src/gguf*.cpp -> ggml/src/gguf*.cpp
# src/ggml-blas/* -> ggml/src/ggml-blas/*
# src/ggml-cann/* -> ggml/src/ggml-cann/*
# src/ggml-cpu/* -> ggml/src/ggml-cpu/*
# src/ggml-cuda/* -> ggml/src/ggml-cuda/*
# src/ggml-hip/* -> ggml/src/ggml-hip/*
# src/ggml-metal/* -> ggml/src/ggml-metal/*
# src/ggml-musa/* -> ggml/src/ggml-musa/*
# src/ggml-opencl/* -> ggml/src/ggml-opencl/*
# src/ggml-rpc/* -> ggml/src/ggml-rpc/*
# src/ggml-sycl/* -> ggml/src/ggml-sycl/*
# src/ggml-vulkan/* -> ggml/src/ggml-vulkan/*
# src/ggml* -> ggml/src/ggml*
#
# include/ggml*.h -> ggml/include/ggml*.h
# include/gguf*.h -> ggml/include/gguf*.h
Expand All @@ -163,20 +135,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
-e 's/([[:space:]]| [ab]\/)cmake\/common.cmake/\1ggml\/cmake\/common.cmake/g' \
-e 's/([[:space:]]| [ab]\/)cmake\/ggml-config.cmake.in/\1ggml\/cmake\/ggml-config.cmake.in/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\/cmake\/FindSIMD.cmake/\1ggml\/src\/ggml-cpu\/cmake\/FindSIMD.cmake/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)\.c/\1ggml\/src\/ggml\2.c/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)\.cpp/\1ggml\/src\/ggml\2.cpp/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)\.h/\1ggml\/src\/ggml\2.h/g' \
-e 's/([[:space:]]| [ab]\/)src\/gguf(.*)\.cpp/\1ggml\/src\/gguf\2.cpp/g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-blas\//\1ggml\/src\/ggml-blas\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-cann\//\1ggml\/src\/ggml-cann\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\//\1ggml\/src\/ggml-cpu\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-hip\//\1ggml\/src\/ggml-hip\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-metal\//\1ggml\/src\/ggml-metal\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-opencl\//\1ggml\/src\/ggml-opencl\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-rpc\//\1ggml\/src\/ggml-rpc\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml-vulkan\//\1ggml\/src\/ggml-vulkan\//g' \
-e 's/([[:space:]]| [ab]\/)src\/ggml(.*)/\1ggml\/src\/ggml\2/g' \
-e 's/([[:space:]]| [ab]\/)include\/ggml(.*)\.h/\1ggml\/include\/ggml\2.h/g' \
-e 's/([[:space:]]| [ab]\/)include\/gguf(.*)\.h/\1ggml\/include\/gguf\2.h/g' \
-e 's/([[:space:]]| [ab]\/)tests\/(.*)\.cpp/\1tests\/\2.cpp/g' \
Expand Down
2 changes: 1 addition & 1 deletion scripts/sync-ggml.last
Original file line number Diff line number Diff line change
@@ -1 +1 @@
b141fc226b68e4af383101c39da90b54ede98850
323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e
16 changes: 1 addition & 15 deletions scripts/sync-ggml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,7 @@ cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
cp -rpv ../ggml/cmake/* ./ggml/cmake/
cp -rpv ../ggml/src/ggml-cpu/cmake/* ./ggml/src/ggml-cpu/cmake/

cp -rpv ../ggml/src/ggml*.c ./ggml/src/
cp -rpv ../ggml/src/ggml*.cpp ./ggml/src/
cp -rpv ../ggml/src/ggml*.h ./ggml/src/
cp -rpv ../ggml/src/gguf*.cpp ./ggml/src/
cp -rpv ../ggml/src/ggml-blas/* ./ggml/src/ggml-blas/
cp -rpv ../ggml/src/ggml-cann/* ./ggml/src/ggml-cann/
cp -rpv ../ggml/src/ggml-cpu/* ./ggml/src/ggml-cpu/
cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/
cp -rpv ../ggml/src/ggml-hip/* ./ggml/src/ggml-hip/
cp -rpv ../ggml/src/ggml-metal/* ./ggml/src/ggml-metal/
cp -rpv ../ggml/src/ggml-musa/* ./ggml/src/ggml-musa/
cp -rpv ../ggml/src/ggml-opencl/* ./ggml/src/ggml-opencl/
cp -rpv ../ggml/src/ggml-rpc/* ./ggml/src/ggml-rpc/
cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/
cp -rpv ../ggml/src/ggml-vulkan/* ./ggml/src/ggml-vulkan/
cp -rpv ../ggml/src/ggml* ./ggml/src/

cp -rpv ../ggml/include/ggml*.h ./ggml/include/
cp -rpv ../ggml/include/gguf*.h ./ggml/include/
Expand Down
Loading