Skip to content

Commit 10640e3

Browse files
authored
ggml : fix interpolate with align-corners and ne=1 (#16700)
* ggml : fix interpolate with align-corners and ne=1 * avoid division by zero if one of the spatial dimensions is 1 * cpu, cuda, opencl returned correct result anyway due to clamp * vulkan didn't clamp for align-corners so results were broken * fix clang warning
1 parent 80d28f1 commit 10640e3

File tree

6 files changed

+29
-36
lines changed

6 files changed

+29
-36
lines changed

ggml/src/ggml-cpu/ops.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7519,8 +7519,8 @@ static void ggml_compute_forward_upscale_f32(
75197519
float pixel_offset = 0.5f;
75207520
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
75217521
pixel_offset = 0.0f;
7522-
sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
7523-
sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
7522+
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
7523+
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
75247524
}
75257525

75267526
for (int64_t i3 = 0; i3 < ne3; i3++) {

ggml/src/ggml-cuda/upscale.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
126126
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
127127
float pixel_offset = 0.5f;
128128
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
129-
sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1);
130-
sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1);
129+
sf0 = dst->ne[0] > 1 && src0->ne[0] > 1 ? (float)(dst->ne[0] - 1) / (src0->ne[0] - 1) : sf0;
130+
sf1 = dst->ne[1] > 1 && src0->ne[1] > 1 ? (float)(dst->ne[1] - 1) / (src0->ne[1] - 1) : sf1;
131131
pixel_offset = 0.0f;
132132
}
133133
upscale_f32_bilinear_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3],

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6156,8 +6156,8 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg
61566156
CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf3));
61576157
} else if (mode == GGML_SCALE_MODE_BILINEAR) {
61586158
if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
6159-
sf0 = (float)(ne0 - 1) / (ne00 - 1);
6160-
sf1 = (float)(ne1 - 1) / (ne01 - 1);
6159+
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
6160+
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
61616161
pixel_offset = 0.0f;
61626162
}
61636163

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ struct vk_device_struct {
523523
vk_pipeline pipeline_add_id_f32;
524524

525525
vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
526-
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bilinear_ac_f32;
526+
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32;
527527
vk_pipeline pipeline_scale_f32;
528528
vk_pipeline pipeline_sqr_f32;
529529
vk_pipeline pipeline_sqrt_f32;
@@ -1238,6 +1238,7 @@ struct vk_op_upscale_push_constants {
12381238
uint32_t nb00; uint32_t nb01; uint32_t nb02; uint32_t nb03;
12391239
uint32_t ne10; uint32_t ne11; uint32_t ne12; uint32_t ne13;
12401240
float sf0; float sf1; float sf2; float sf3;
1241+
float pixel_offset;
12411242
};
12421243

12431244
struct vk_op_sum_rows_push_constants
@@ -3493,7 +3494,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
34933494

34943495
ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
34953496
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
3496-
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_ac_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS}, 1);
34973497

34983498
ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
34993499

@@ -7798,14 +7798,14 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
77987798
return nullptr;
77997799
case GGML_OP_UPSCALE:
78007800
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
7801-
int mode = ggml_get_op_params_i32(dst, 0);
7801+
ggml_scale_mode mode = (ggml_scale_mode)(ggml_get_op_params_i32(dst, 0) & 0xFF);
78027802
switch (mode) {
78037803
case GGML_SCALE_MODE_NEAREST:
78047804
return ctx->device->pipeline_upscale_nearest_f32;
78057805
case GGML_SCALE_MODE_BILINEAR:
78067806
return ctx->device->pipeline_upscale_bilinear_f32;
7807-
case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS:
7808-
return ctx->device->pipeline_upscale_bilinear_ac_f32;
7807+
default:
7808+
return nullptr;
78097809
}
78107810
}
78117811
return nullptr;
@@ -9294,22 +9294,26 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c
92949294
const uint32_t src0_type_size = ggml_type_size(src0->type);
92959295
const uint32_t mode = (uint32_t)ggml_get_op_params_i32(dst, 0);
92969296

9297-
float sf0 = (float)dst->ne[0] / src0->ne[0];
9298-
float sf1 = (float)dst->ne[1] / src0->ne[1];
9299-
float sf2 = (float)dst->ne[2] / src0->ne[2];
9300-
float sf3 = (float)dst->ne[3] / src0->ne[3];
9297+
GGML_TENSOR_UNARY_OP_LOCALS
9298+
9299+
float sf0 = (float)ne0 / ne00;
9300+
float sf1 = (float)ne1 / ne01;
9301+
float sf2 = (float)ne2 / ne02;
9302+
float sf3 = (float)ne3 / ne03;
9303+
float pixel_offset = 0.5f;
93019304

93029305
if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
9303-
sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1);
9304-
sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1);
9306+
sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
9307+
sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
9308+
pixel_offset = 0.0f;
93059309
}
93069310

93079311
ggml_vk_op_f32<vk_op_upscale_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_UPSCALE, {
93089312
(uint32_t)ggml_nelements(dst), 0, 0,
9309-
(uint32_t)src0->ne[0], (uint32_t)src0->ne[1],
9310-
(uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
9311-
(uint32_t)dst->ne[0], (uint32_t)dst->ne[1], (uint32_t)dst->ne[2],(uint32_t)dst->ne[3],
9312-
sf0, sf1, sf2, sf3,
9313+
(uint32_t)ne00, (uint32_t)ne01,
9314+
(uint32_t)nb00 / src0_type_size, (uint32_t)nb01 / src0_type_size, (uint32_t)nb02 / src0_type_size, (uint32_t)nb03 / src0_type_size,
9315+
(uint32_t)ne0, (uint32_t)ne1, (uint32_t)ne2, (uint32_t)ne3,
9316+
sf0, sf1, sf2, sf3, pixel_offset
93139317
}, dryrun);
93149318
}
93159319

ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ layout (push_constant) uniform parameter
77
uint nb00; uint nb01; uint nb02; uint nb03;
88
uint ne10; uint ne11; uint ne12; uint ne13;
99
float sf0; float sf1; float sf2; float sf3;
10+
float pixel_offset;
1011
} p;
1112

1213
#include "types.glsl"
@@ -19,7 +20,6 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
1920
// from ggml.h: enum ggml_scale_mode, enum ggml_scale_flag
2021
#define NEAREST 0
2122
#define BILINEAR 1
22-
#define ALIGN_CORNERS (1 << 8)
2323

2424
layout (constant_id = 0) const uint scale_mode = 0;
2525

@@ -52,7 +52,7 @@ float fetch_bilinear(ivec2 c0, ivec2 c1, vec2 d, uint i12, uint i13) {
5252
float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
5353
const ivec2 ne0 = ivec2(p.ne00, p.ne01);
5454

55-
const vec2 c = (vec2(i10, i11) + 0.5) / vec2(p.sf0, p.sf1) - 0.5;
55+
const vec2 c = (vec2(i10, i11) + p.pixel_offset) / vec2(p.sf0, p.sf1) - p.pixel_offset;
5656
const vec2 c0f = floor(c);
5757
const vec2 d = c - c0f;
5858
const ivec2 c0 = max(ivec2(c0f), 0);
@@ -61,16 +61,6 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
6161
return fetch_bilinear(c0, c1, d, i12, i13);
6262
}
6363

64-
float interpolate_bilinear_align_corners(uint i10, uint i11, uint i12, uint i13) {
65-
const vec2 c = vec2(i10, i11) / vec2(p.sf0, p.sf1);
66-
const vec2 c0f = floor(c);
67-
const vec2 d = c - c0f;
68-
const ivec2 c0 = ivec2(c0f);
69-
const ivec2 c1 = c0 + 1;
70-
71-
return fetch_bilinear(c0, c1, d, i12, i13);
72-
}
73-
7464
void main() {
7565
const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
7666

@@ -91,9 +81,6 @@ void main() {
9181
case BILINEAR:
9282
result = interpolate_bilinear(i10, i11, i12, i13);
9383
break;
94-
case BILINEAR | ALIGN_CORNERS:
95-
result = interpolate_bilinear_align_corners(i10, i11, i12, i13);
96-
break;
9784
}
9885

9986
data_d[p.d_offset + idx] = D_TYPE(result);

tests/test-backend-ops.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7049,6 +7049,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
70497049
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode));
70507050
}
70517051
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
7052+
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
7053+
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
70527054

70537055
test_cases.emplace_back(new test_sum());
70547056
test_cases.emplace_back(new test_sum_rows());

0 commit comments

Comments
 (0)