@@ -523,7 +523,7 @@ struct vk_device_struct {
523523 vk_pipeline pipeline_add_id_f32;
524524
525525 vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
526- vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bilinear_ac_f32 ;
526+ vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32;
527527 vk_pipeline pipeline_scale_f32;
528528 vk_pipeline pipeline_sqr_f32;
529529 vk_pipeline pipeline_sqrt_f32;
@@ -1238,6 +1238,7 @@ struct vk_op_upscale_push_constants {
12381238 uint32_t nb00; uint32_t nb01; uint32_t nb02; uint32_t nb03;
12391239 uint32_t ne10; uint32_t ne11; uint32_t ne12; uint32_t ne13;
12401240 float sf0; float sf1; float sf2; float sf3;
1241+ float pixel_offset;
12411242};
12421243
12431244struct vk_op_sum_rows_push_constants
@@ -3493,7 +3494,6 @@ static void ggml_vk_load_shaders(vk_device& device) {
34933494
34943495 ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
34953496 ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
3496- ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_ac_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS}, 1);
34973497
34983498 ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
34993499
@@ -7798,14 +7798,14 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
77987798 return nullptr;
77997799 case GGML_OP_UPSCALE:
78007800 if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
7801- int mode = ggml_get_op_params_i32(dst, 0);
7801+ ggml_scale_mode mode = (ggml_scale_mode)( ggml_get_op_params_i32(dst, 0) & 0xFF );
78027802 switch (mode) {
78037803 case GGML_SCALE_MODE_NEAREST:
78047804 return ctx->device->pipeline_upscale_nearest_f32;
78057805 case GGML_SCALE_MODE_BILINEAR:
78067806 return ctx->device->pipeline_upscale_bilinear_f32;
7807- case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS :
7808- return ctx->device->pipeline_upscale_bilinear_ac_f32 ;
7807+ default :
7808+ return nullptr ;
78097809 }
78107810 }
78117811 return nullptr;
@@ -9294,22 +9294,26 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c
92949294 const uint32_t src0_type_size = ggml_type_size(src0->type);
92959295 const uint32_t mode = (uint32_t)ggml_get_op_params_i32(dst, 0);
92969296
9297- float sf0 = (float)dst->ne[0] / src0->ne[0];
9298- float sf1 = (float)dst->ne[1] / src0->ne[1];
9299- float sf2 = (float)dst->ne[2] / src0->ne[2];
9300- float sf3 = (float)dst->ne[3] / src0->ne[3];
9297+ GGML_TENSOR_UNARY_OP_LOCALS
9298+
9299+ float sf0 = (float)ne0 / ne00;
9300+ float sf1 = (float)ne1 / ne01;
9301+ float sf2 = (float)ne2 / ne02;
9302+ float sf3 = (float)ne3 / ne03;
9303+ float pixel_offset = 0.5f;
93019304
93029305 if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
9303- sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1);
9304- sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1);
9306+ sf0 = ne0 > 1 && ne00 > 1 ? (float)(ne0 - 1) / (ne00 - 1) : sf0;
9307+ sf1 = ne1 > 1 && ne01 > 1 ? (float)(ne1 - 1) / (ne01 - 1) : sf1;
9308+ pixel_offset = 0.0f;
93059309 }
93069310
93079311 ggml_vk_op_f32<vk_op_upscale_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_UPSCALE, {
93089312 (uint32_t)ggml_nelements(dst), 0, 0,
9309- (uint32_t)src0->ne[0] , (uint32_t)src0->ne[1] ,
9310- (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
9311- (uint32_t)dst->ne[0] , (uint32_t)dst->ne[1] , (uint32_t)dst->ne[2], (uint32_t)dst->ne[3] ,
9312- sf0, sf1, sf2, sf3,
9313+ (uint32_t)ne00 , (uint32_t)ne01 ,
9314+ (uint32_t)nb00 / src0_type_size, (uint32_t)nb01 / src0_type_size, (uint32_t)nb02 / src0_type_size, (uint32_t)nb03 / src0_type_size,
9315+ (uint32_t)ne0 , (uint32_t)ne1 , (uint32_t)ne2, (uint32_t)ne3 ,
9316+ sf0, sf1, sf2, sf3, pixel_offset
93139317 }, dryrun);
93149318}
93159319
0 commit comments