Skip to content

Commit d3b7e04

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 1b37cce + ad51c0a commit d3b7e04

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+4579
-4673
lines changed

.github/labeler.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ ggml:
7676
- changed-files:
7777
- any-glob-to-any-file:
7878
- ggml/**
79+
model:
80+
- changed-files:
81+
- any-glob-to-any-file:
82+
- src/models/**
7983
nix:
8084
- changed-files:
8185
- any-glob-to-any-file:

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,13 @@ LLM inference in C/C++
1717

1818
## Hot topics
1919

20-
- **[guide : running gpt-oss with llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/15396)**
21-
- **[[FEEDBACK] Better packaging for llama.cpp to support downstream consumers 🤗](https://github.com/ggml-org/llama.cpp/discussions/15313)**
20+
- **[guide : using the new WebUI of llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/16938)**
21+
- [guide : running gpt-oss with llama.cpp](https://github.com/ggml-org/llama.cpp/discussions/15396)
22+
- [[FEEDBACK] Better packaging for llama.cpp to support downstream consumers 🤗](https://github.com/ggml-org/llama.cpp/discussions/15313)
2223
- Support for the `gpt-oss` model with native MXFP4 format has been added | [PR](https://github.com/ggml-org/llama.cpp/pull/15091) | [Collaboration with NVIDIA](https://blogs.nvidia.com/blog/rtx-ai-garage-openai-oss) | [Comment](https://github.com/ggml-org/llama.cpp/discussions/15095)
23-
- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen)
2424
- Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
2525
- VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode
2626
- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim
27-
- Introducing GGUF-my-LoRA https://github.com/ggml-org/llama.cpp/discussions/10123
2827
- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggml-org/llama.cpp/discussions/9669
2928
- Hugging Face GGUF editor: [discussion](https://github.com/ggml-org/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor)
3029

ggml/include/ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,6 +2108,7 @@ extern "C" {
21082108
enum ggml_scale_mode {
21092109
GGML_SCALE_MODE_NEAREST = 0,
21102110
GGML_SCALE_MODE_BILINEAR = 1,
2111+
GGML_SCALE_MODE_BICUBIC = 2,
21112112

21122113
GGML_SCALE_MODE_COUNT
21132114
};

ggml/src/ggml-cpu/ops.cpp

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7507,10 +7507,17 @@ static void ggml_compute_forward_upscale_f32(
75077507
float sf1 = (float)ne1/src0->ne[1];
75087508
float sf2 = (float)ne2/src0->ne[2];
75097509
float sf3 = (float)ne3/src0->ne[3];
7510+
float pixel_offset = 0.5f;
75107511

75117512
const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
75127513
const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
75137514

7515+
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
7516+
pixel_offset = 0.0f;
7517+
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
7518+
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
7519+
}
7520+
75147521
if (mode == GGML_SCALE_MODE_NEAREST) {
75157522
for (int64_t i3 = 0; i3 < ne3; i3++) {
75167523
const int64_t i03 = i3 / sf3;
@@ -7530,13 +7537,6 @@ static void ggml_compute_forward_upscale_f32(
75307537
}
75317538
}
75327539
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
7533-
float pixel_offset = 0.5f;
7534-
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
7535-
pixel_offset = 0.0f;
7536-
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
7537-
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
7538-
}
7539-
75407540
for (int64_t i3 = 0; i3 < ne3; i3++) {
75417541
const int64_t i03 = i3 / sf3;
75427542
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
@@ -7571,6 +7571,51 @@ static void ggml_compute_forward_upscale_f32(
75717571

75727572
const float val = a*(1 - dx)*(1 - dy) + b*dx*(1 - dy) + c*(1 - dx)*dy + d*dx*dy;
75737573

7574+
float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
7575+
*y_dst = val;
7576+
}
7577+
}
7578+
}
7579+
}
7580+
} else if (mode == GGML_SCALE_MODE_BICUBIC) {
7581+
// https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
7582+
const float a = -0.75f; // use alpha = -0.75 (same as PyTorch)
7583+
auto weight1 = [a](float x) { return ((a + 2) * x - (a + 3)) * x * x + 1; };
7584+
auto weight2 = [a](float x) { return ((a * x - 5 * a) * x + 8 * a) * x - 4 * a; };
7585+
auto bicubic = [=](float p0, float p1, float p2, float p3, float x) {
7586+
const float w0 = weight2(x + 1);
7587+
const float w1 = weight1(x + 0);
7588+
const float w2 = weight1(1 - x);
7589+
const float w3 = weight2(2 - x);
7590+
return p0*w0 + p1*w1 + p2*w2 + p3*w3;
7591+
};
7592+
7593+
for (int64_t i3 = 0; i3 < ne3; i3++) {
7594+
const int64_t i03 = i3 / sf3;
7595+
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
7596+
const int64_t i02 = i2 / sf2;
7597+
for (int64_t i1 = 0; i1 < ne1; i1++) {
7598+
const float y = ((float)i1 + pixel_offset) / sf1 - pixel_offset;
7599+
const int64_t y0 = (int64_t)floorf(y);
7600+
const float dy = y - (float)y0;
7601+
7602+
for (int64_t i0 = 0; i0 < ne0; i0++) {
7603+
const float x = ((float)i0 + pixel_offset) / sf0 - pixel_offset;
7604+
const int64_t x0 = (int64_t)floorf(x);
7605+
const float dx = x - (float)x0;
7606+
7607+
auto p = [=](int64_t x_off, int64_t y_off) -> float {
7608+
int64_t i00 = std::max(int64_t(0), std::min(x0 + x_off, ne00 - 1));
7609+
int64_t i01 = std::max(int64_t(0), std::min(y0 + y_off, ne01 - 1));
7610+
return *(const float *)((const char *)src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
7611+
};
7612+
7613+
const float val = bicubic(
7614+
bicubic(p(-1,-1), p(0,-1), p(1,-1), p(2,-1), dx),
7615+
bicubic(p(-1, 0), p(0, 0), p(1, 0), p(2, 0), dx),
7616+
bicubic(p(-1, 1), p(0, 1), p(1, 1), p(2, 1), dx),
7617+
bicubic(p(-1, 2), p(0, 2), p(1, 2), p(2, 2), dx), dy);
7618+
75747619
float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
75757620
*y_dst = val;
75767621
}

0 commit comments

Comments
 (0)