ggml-cpu : "align corners" for bilinear upscale/downscale (#1285)

Acly · web-flow · commit 67ad436cb653 · 2025-07-01T09:11:00.000+02:00
* add "align corners" mode for bilinear upscale, and allow downscaling
* add ggml_interpolate, deprecate ggml_upscale_ext, pass in align-corners as bit-flag
* test-backend-ops: replace ggml_upscale_ext with ggml_interpolate, add test cases for downscale and align-corners
diff --git a/include/ggml.h b/include/ggml.h
@@ -1765,6 +1765,12 @@ extern "C" {
     enum ggml_scale_mode {
         GGML_SCALE_MODE_NEAREST  = 0,
         GGML_SCALE_MODE_BILINEAR = 1,
+
+        GGML_SCALE_MODE_COUNT
+    };
+
+    enum ggml_scale_flag {
+        GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
     };
 
     // interpolate
@@ -1777,14 +1783,26 @@ extern "C" {
 
     // interpolate
     // interpolate scale to specified dimensions
-    GGML_API struct ggml_tensor * ggml_upscale_ext(
+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
             struct ggml_context * ctx,
             struct ggml_tensor  * a,
             int                   ne0,
             int                   ne1,
             int                   ne2,
             int                   ne3,
-            enum ggml_scale_mode  mode);
+            enum ggml_scale_mode  mode),
+        "use ggml_interpolate instead");
+
+    // Up- or downsamples the input to the specified size.
+    // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
+    GGML_API struct ggml_tensor * ggml_interpolate(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * a,
+            int64_t               ne0,
+            int64_t               ne1,
+            int64_t               ne2,
+            int64_t               ne3,
+            uint32_t              mode); // ggml_scale_mode [ | ggml_scale_flag...]
 
     // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
     GGML_API struct ggml_tensor * ggml_pad(
diff --git a/src/ggml-cpu/ops.cpp b/src/ggml-cpu/ops.cpp
@@ -6608,12 +6608,13 @@ static void ggml_compute_forward_upscale_f32(
 
     GGML_TENSOR_UNARY_OP_LOCALS
 
-    const float sf0 = (float)ne0/src0->ne[0];
-    const float sf1 = (float)ne1/src0->ne[1];
-    const float sf2 = (float)ne2/src0->ne[2];
-    const float sf3 = (float)ne3/src0->ne[3];
+    float sf0 = (float)ne0/src0->ne[0];
+    float sf1 = (float)ne1/src0->ne[1];
+    float sf2 = (float)ne2/src0->ne[2];
+    float sf3 = (float)ne3/src0->ne[3];
 
-    const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
+    const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
+    const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
 
     if (mode == GGML_SCALE_MODE_NEAREST) {
         for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -6634,8 +6635,12 @@ static void ggml_compute_forward_upscale_f32(
             }
         }
     } else if (mode == GGML_SCALE_MODE_BILINEAR) {
-        // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
-        const float pixel_offset = 0.5f;
+        float pixel_offset = 0.5f;
+        if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
+            pixel_offset = 0.0f;
+            sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
+            sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
+        }
 
         for (int64_t i3 = 0; i3 < ne3; i3++) {
             const int64_t i03 = i3 / sf3;
diff --git a/src/ggml.c b/src/ggml.c
@@ -4247,24 +4247,21 @@ struct ggml_tensor * ggml_pool_2d_back(
     return result;
 }
 
-// ggml_upscale
+// ggml_upscale / ggml_interpolate
 
-static struct ggml_tensor * ggml_upscale_impl(
+static struct ggml_tensor * ggml_interpolate_impl(
         struct ggml_context * ctx,
         struct ggml_tensor  * a,
-        int                   ne0,
-        int                   ne1,
-        int                   ne2,
-        int                   ne3,
-        enum ggml_scale_mode  mode) {
-    GGML_ASSERT(a->ne[0] <= ne0);
-    GGML_ASSERT(a->ne[1] <= ne1);
-    GGML_ASSERT(a->ne[2] <= ne2);
-    GGML_ASSERT(a->ne[3] <= ne3);
-
+        int64_t               ne0,
+        int64_t               ne1,
+        int64_t               ne2,
+        int64_t               ne3,
+        uint32_t              mode) {
+    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
+    
     struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
 
-    ggml_set_op_params_i32(result, 0, mode);
+    ggml_set_op_params_i32(result, 0, (int32_t)mode);
 
     result->op     = GGML_OP_UPSCALE;
     result->src[0] = a;
@@ -4277,7 +4274,8 @@ struct ggml_tensor * ggml_upscale(
         struct ggml_tensor  * a,
         int                   scale_factor,
         enum ggml_scale_mode  mode) {
-    return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
+    GGML_ASSERT(scale_factor > 1);
+    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
 }
 
 struct ggml_tensor * ggml_upscale_ext(
@@ -4288,7 +4286,18 @@ struct ggml_tensor * ggml_upscale_ext(
         int                   ne2,
         int                   ne3,
         enum ggml_scale_mode  mode) {
-    return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
+    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
+}
+
+struct ggml_tensor * ggml_interpolate(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        int64_t               ne0,
+        int64_t               ne1,
+        int64_t               ne2,
+        int64_t               ne3,
+        uint32_t              mode) {
+    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
 }
 
 // ggml_pad
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -326,4 +326,13 @@ if (NOT GGML_BACKEND_DL)
     target_link_libraries(${TEST_TARGET} PRIVATE ggml)
     add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
     set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
+
+    #
+    # test-interpolate
+
+    set(TEST_TARGET test-interpolate)
+    add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp)
+    target_link_libraries(${TEST_TARGET} PRIVATE ggml)
+    add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}>)
+    set_property(TEST ${TEST_TARGET} PROPERTY ENVIRONMENT "LLVM_PROFILE_FILE=${TEST_TARGET}.profraw")
 endif()
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
@@ -3066,28 +3066,28 @@ struct test_upscale : public test_case {
     }
 };
 
-// GGML_OP_UPSCALE (ext)
-struct test_upscale_ext : public test_case {
+// GGML_OP_UPSCALE (via ggml_interpolate)
+struct test_interpolate : public test_case {
     const ggml_type type;
     const std::array<int64_t, 4> ne;
     const std::array<int64_t, 4> ne_tgt;
-    const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
+    const uint32_t mode = GGML_SCALE_MODE_NEAREST;
 
     std::string vars() override {
         return VARS_TO_STR4(type, ne, ne_tgt, mode);
     }
 
-    test_upscale_ext(ggml_type type = GGML_TYPE_F32,
+    test_interpolate(ggml_type type = GGML_TYPE_F32,
             std::array<int64_t, 4> ne     = {2, 5,  7, 11},
             std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
-            ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
+            uint32_t mode = GGML_SCALE_MODE_NEAREST)
         : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
 
     ggml_tensor * build_graph(ggml_context * ctx) override {
         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
         ggml_set_name(a, "a");
 
-        ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
+        ggml_tensor * out = ggml_interpolate(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
         ggml_set_name(out, "out");
 
         return out;
@@ -4521,8 +4521,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
     for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
         test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
         test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
-        test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, mode));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, mode));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5,  7, 11}, mode));
     }
+    test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
 
     test_cases.emplace_back(new test_sum());
     test_cases.emplace_back(new test_sum_rows());
diff --git a/tests/test-interpolate.cpp b/tests/test-interpolate.cpp
@@ -0,0 +1,166 @@
+#include <ggml.h>
+#include <ggml-cpu.h>
+#include <ggml-alloc.h>
+#include <ggml-backend.h>
+#include <ggml-cpp.h>
+
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <array>
+#include <vector>
+
+bool check_equal(const float * result, const float * expected, int64_t n) {
+    for (int i = 0; i < n; i++) {
+        if(std::abs(result[i] - expected[i]) > 1e-4) {
+            printf("result[%d] %f != %f expected[%d]\n", i, result[i], expected[i], i);
+            return false;
+        }
+    }
+    return true;
+}
+
+bool test_interpolate(char const* name,
+                      std::array<int64_t, 4> src_ne, const float * src_data,
+                      std::array<int32_t, 4> dst_ne, const float * expected,
+                      uint32_t mode) {
+    ggml_time_init();
+
+    ggml_init_params params {
+        /*.mem_size   =*/ 64 * ggml_tensor_overhead() + ggml_graph_overhead(),
+        /*.mem_buffer =*/ NULL,
+        /*.no_alloc   =*/ true
+    };
+
+    ggml_context_ptr ctx_ptr{ggml_init(params)};
+    ggml_context * ctx = ctx_ptr.get();
+    ggml_cgraph * gf = ggml_new_graph(ctx);
+
+    // Build graph
+    ggml_tensor * src = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, src_ne.data());
+    ggml_tensor * res = ggml_interpolate(ctx, src, dst_ne[0], dst_ne[1], dst_ne[2], dst_ne[3], mode);
+    ggml_build_forward_expand(gf, res);
+
+    // Create backend & allocate buffers
+    ggml_backend_ptr backend_ptr{ggml_backend_cpu_init()};
+    ggml_backend_t backend = backend_ptr.get();
+    ggml_backend_cpu_set_n_threads(backend, 2);
+    ggml_backend_buffer_ptr buffer{ggml_backend_alloc_ctx_tensors(ctx, backend)};
+
+    // Execute and compare results
+    ggml_backend_tensor_set(src, src_data, 0, ggml_nbytes(src));
+    ggml_backend_graph_compute(backend, gf);
+
+    std::vector<float> res_values(ggml_nelements(res));
+    ggml_backend_tensor_get(res, res_values.data(), 0, ggml_nbytes(res));
+
+    bool passed = check_equal(res_values.data(), expected, ggml_nelements(res));
+
+    printf("ggml_interpolate(%s): %s\n", name, passed ? "\033[32mPASSED\033[0m" : "\033[31mFAILED\033[0m");
+    return passed;
+}
+
+const float input_upscale[] = {
+    0.0f, 1.0f,
+    2.0f, 4.0f
+};
+
+const float expected_upscale_x2_nearest[] = {
+    0.0f, 0.0f, 1.0f, 1.0f,
+    0.0f, 0.0f, 1.0f, 1.0f,
+    2.0f, 2.0f, 4.0f, 4.0f,
+    2.0f, 2.0f, 4.0f, 4.0f
+};
+
+const float expected_upscale_x2_bilinear[] = {
+    0.0f, 0.2500f, 0.7500f, 1.00f,
+    0.5f, 0.8125f, 1.4375f, 1.75f,
+    1.5f, 1.9375f, 2.8125f, 3.25f,
+    2.0f, 2.5000f, 3.5000f, 4.00f
+};
+
+const float expected_upscale_x2_bilinear_align_corners[] = {
+    0.0000f, 0.3333f, 0.6667f, 1.0000f,
+    0.6667f, 1.1111f, 1.5556f, 2.0000f,
+    1.3333f, 1.8889f, 2.4444f, 3.0000f,
+    2.0000f, 2.6667f, 3.3333f, 4.0000f
+};
+
+const float expected_upscale_x1_5_bilinear_align_corners[] = {
+    0.0f, 1.0f,
+    1.0f, 2.5f,
+    2.0f, 4.0f
+};
+
+const float input_downscale[] = {
+    0.0f, -1.0f, -2.0f, 0.0f,
+    1.0f, 2.0f , 4.0f , 4.0f,
+    2.0f, 2.0f , 1.0f , 1.0f,
+
+    1.0f, 2.0f , 3.0f , 4.0f,
+    2.0f, 2.0f , 2.0f , 2.0f,
+    -2.0f, 2.0f, -4.0f, 4.0f
+};
+
+const float expected_downscale_nearest[] = {
+    0.0f, -2.0f,
+
+    1.0f, 3.0f
+};
+
+const float expected_downscale_bilinear[] = {
+    0.1667f, -0.3750f,  0.7500f,
+    1.7917f,  1.8750f,  1.7500f,
+
+    1.3750f,  2.3750f,  3.3750f,
+   -0.5000f, -0.2500f,  2.5000f
+};
+
+const float expected_downscale_bilinear_align_corners[] = {
+    0.0f , -1.5f, 0.0f,
+    2.0f ,  1.5f, 1.0f,
+
+    1.0f ,  2.5f, 4.0f,
+    -2.0f, -1.0f, 4.0f
+};
+
+int main() {
+    bool passed = true;
+
+    passed &= test_interpolate("upscale_x2_nearest",
+        {2, 2, 1, 1}, input_upscale,
+        {4, 4, 1, 1}, expected_upscale_x2_nearest,
+        GGML_SCALE_MODE_NEAREST);
+
+    passed &= test_interpolate("upscale_x2_bilinear",
+        {2, 2, 1, 1}, input_upscale,
+        {4, 4, 1, 1}, expected_upscale_x2_bilinear,
+        GGML_SCALE_MODE_BILINEAR);
+
+    passed &= test_interpolate("upscale_x2_bilinear_align_corners",
+        {2, 2, 1, 1}, input_upscale,
+        {4, 4, 1, 1}, expected_upscale_x2_bilinear_align_corners,
+        GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS);
+
+    passed &= test_interpolate("upscale_x1_5_bilinear_align_corners",
+        {2, 2, 1, 1}, input_upscale,
+        {2, 3, 1, 1}, expected_upscale_x1_5_bilinear_align_corners,
+        GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS);
+
+    passed &= test_interpolate("downscale_nearest",
+        {4, 3, 2, 1}, input_downscale,
+        {2, 1, 2, 1}, expected_downscale_nearest,
+        GGML_SCALE_MODE_NEAREST);
+
+    passed &= test_interpolate("downscale_bilinear",
+        {4, 3, 2, 1}, input_downscale,
+        {3, 2, 2, 1}, expected_downscale_bilinear,
+        GGML_SCALE_MODE_BILINEAR);
+
+    passed &= test_interpolate("downscale_bilinear_align_corners",
+        {4, 3, 2, 1}, input_downscale,
+        {3, 2, 2, 1}, expected_downscale_bilinear_align_corners,
+        GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS);
+
+    return passed ? 0 : 1;
+}