@@ -417,6 +417,7 @@ struct vk_device_struct {
417
417
vk_pipeline pipeline_cos_f32;
418
418
vk_pipeline pipeline_clamp_f32;
419
419
vk_pipeline pipeline_pad_f32;
420
+ vk_pipeline pipeline_roll_f32;
420
421
vk_pipeline pipeline_repeat_f32, pipeline_repeat_back_f32;
421
422
vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16, pipeline_cpy_f16_f32, pipeline_cpy_f32_bf16;
422
423
vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16, pipeline_contig_cpy_f16_f32, pipeline_contig_cpy_f32_bf16;
@@ -687,6 +688,37 @@ struct vk_op_unary_push_constants {
687
688
};
688
689
static_assert(sizeof(vk_op_unary_push_constants) <= 128, "sizeof(vk_op_unary_push_constants) must be <= 128");
689
690
691
+ static vk_op_unary_push_constants vk_op_unary_push_constants_init(const ggml_tensor * src0, const ggml_tensor * dst, int64_t ne = 0) {
692
+ GGML_ASSERT(ne != 0 || (ggml_nelements(src0) == ggml_nelements(dst)));
693
+ ne = ne != 0 ? ne : ggml_nelements(dst);
694
+ GGML_ASSERT(ne <= (int64_t)std::numeric_limits<uint32_t>::max());
695
+
696
+ vk_op_unary_push_constants p{};
697
+ p.ne = (uint32_t)ne;
698
+
699
+ size_t src0_tsize = ggml_type_size(src0->type);
700
+ p.ne00 = (uint32_t)src0->ne[0];
701
+ p.ne01 = (uint32_t)src0->ne[1];
702
+ p.ne02 = (uint32_t)src0->ne[2];
703
+ p.ne03 = (uint32_t)src0->ne[3];
704
+ p.nb00 = (uint32_t)(src0->nb[0] / src0_tsize);
705
+ p.nb01 = (uint32_t)(src0->nb[1] / src0_tsize);
706
+ p.nb02 = (uint32_t)(src0->nb[2] / src0_tsize);
707
+ p.nb03 = (uint32_t)(src0->nb[3] / src0_tsize);
708
+
709
+ size_t dst_tsize = ggml_type_size(dst->type);
710
+ p.ne10 = (uint32_t)dst->ne[0];
711
+ p.ne11 = (uint32_t)dst->ne[1];
712
+ p.ne12 = (uint32_t)dst->ne[2];
713
+ p.ne13 = (uint32_t)dst->ne[3];
714
+ p.nb10 = (uint32_t)(dst->nb[0] / dst_tsize);
715
+ p.nb11 = (uint32_t)(dst->nb[1] / dst_tsize);
716
+ p.nb12 = (uint32_t)(dst->nb[2] / dst_tsize);
717
+ p.nb13 = (uint32_t)(dst->nb[3] / dst_tsize);
718
+
719
+ return p; // fastdiv values and offsets are initialized later in ggml_vk_op
720
+ }
721
+
690
722
// See https://gmplib.org/~tege/divcnst-pldi94.pdf figure 4.1.
691
723
// Precompute mp (m' in the paper) and L such that division
692
724
// can be computed using a multiply (high 32b of 64b result)
@@ -2753,6 +2785,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
2753
2785
2754
2786
ggml_vk_create_pipeline(device, device->pipeline_pad_f32, "pad_f32", pad_f32_len, pad_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2755
2787
2788
+ ggml_vk_create_pipeline(device, device->pipeline_roll_f32, "roll_f32", roll_f32_len, roll_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2789
+
2756
2790
ggml_vk_create_pipeline(device, device->pipeline_repeat_f32, "repeat_f32", repeat_f32_len, repeat_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2757
2791
ggml_vk_create_pipeline(device, device->pipeline_repeat_back_f32, "repeat_back_f32", repeat_back_f32_len, repeat_back_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1);
2758
2792
@@ -6425,6 +6459,11 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
6425
6459
return ctx->device->pipeline_pad_f32;
6426
6460
}
6427
6461
return nullptr;
6462
+ case GGML_OP_ROLL:
6463
+ if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
6464
+ return ctx->device->pipeline_roll_f32;
6465
+ }
6466
+ return nullptr;
6428
6467
case GGML_OP_REPEAT:
6429
6468
if (ggml_type_size(src0->type) == sizeof(float) && ggml_type_size(dst->type) == sizeof(float)) {
6430
6469
return ctx->device->pipeline_repeat_f32;
@@ -6965,6 +7004,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
6965
7004
case GGML_OP_COS:
6966
7005
case GGML_OP_CLAMP:
6967
7006
case GGML_OP_PAD:
7007
+ case GGML_OP_ROLL:
6968
7008
case GGML_OP_REPEAT:
6969
7009
case GGML_OP_REPEAT_BACK:
6970
7010
case GGML_OP_CPY:
@@ -7416,117 +7456,60 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c
7416
7456
}
7417
7457
7418
7458
static void ggml_vk_scale(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7419
- float * op_params = (float *)dst->op_params;
7420
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7421
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7459
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst);
7460
+ p.param1 = ggml_get_op_params_f32(dst, 0);
7422
7461
7423
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SCALE, {
7424
- (uint32_t)ggml_nelements(src0),
7425
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7426
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7427
- 0,
7428
- op_params[0], 0.0f,
7429
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7430
- }, dryrun);
7462
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SCALE, std::move(p), dryrun);
7431
7463
}
7432
7464
7433
7465
static void ggml_vk_sqr(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7434
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7435
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7436
-
7437
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SQR, {
7438
- (uint32_t)ggml_nelements(src0),
7439
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7440
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7441
- 0,
7442
- 0.0f, 0.0f,
7443
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7444
- }, dryrun);
7466
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SQR, vk_op_unary_push_constants_init(src0, dst), dryrun);
7445
7467
}
7446
7468
7447
7469
static void ggml_vk_sin(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7448
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7449
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7450
-
7451
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SIN, {
7452
- (uint32_t)ggml_nelements(src0),
7453
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7454
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7455
- 0,
7456
- 0.0f, 0.0f,
7457
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7458
- }, dryrun);
7470
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SIN, vk_op_unary_push_constants_init(src0, dst), dryrun);
7459
7471
}
7460
7472
7461
7473
static void ggml_vk_cos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7462
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7463
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7464
-
7465
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_COS, {
7466
- (uint32_t)ggml_nelements(src0),
7467
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7468
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7469
- 0,
7470
- 0.0f, 0.0f,
7471
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7472
- }, dryrun);
7474
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_COS, vk_op_unary_push_constants_init(src0, dst), dryrun);
7473
7475
}
7474
7476
7475
7477
static void ggml_vk_clamp(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7476
- float * op_params = (float *) dst->op_params ;
7477
- const uint32_t src0_type_size = ggml_type_size(src0->type );
7478
- const uint32_t dst_type_size = ggml_type_size (dst->type );
7478
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst) ;
7479
+ p.param1 = ggml_get_op_params_f32(dst, 0 );
7480
+ p.param2 = ggml_get_op_params_f32 (dst, 1 );
7479
7481
7480
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CLAMP, {
7481
- (uint32_t)ggml_nelements(src0),
7482
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7483
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7484
- 0,
7485
- op_params[0], op_params[1],
7486
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7487
- }, dryrun);
7482
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CLAMP, std::move(p), dryrun);
7488
7483
}
7489
7484
7490
7485
static void ggml_vk_pad(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7491
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7492
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7486
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
7487
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_PAD, std::move(p), dryrun);
7488
+ }
7493
7489
7494
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_PAD, {
7495
- (uint32_t)ggml_nelements(dst),
7496
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7497
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7498
- 0,
7499
- 0.0f, 0.0f,
7500
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7501
- }, dryrun);
7490
+ static void ggml_vk_roll(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7491
+ const int32_t s0 = ggml_get_op_params_i32(dst, 0);
7492
+ const int32_t s1 = ggml_get_op_params_i32(dst, 1);
7493
+ const int32_t s2 = ggml_get_op_params_i32(dst, 2);
7494
+ const int32_t s3 = ggml_get_op_params_i32(dst, 3);
7495
+ const uint32_t s01_packed = ((s0 + 0x8000) << 16) | (s1 + 0x8000);
7496
+ const uint32_t s23_packed = ((s2 + 0x8000) << 16) | (s3 + 0x8000);
7497
+
7498
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst);
7499
+ memcpy(&p.param1, &s01_packed, sizeof(float));
7500
+ memcpy(&p.param2, &s23_packed, sizeof(float));
7501
+
7502
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_ROLL, std::move(p), dryrun);
7502
7503
}
7503
7504
7504
7505
static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7505
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7506
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7507
-
7508
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT, {
7509
- (uint32_t)ggml_nelements(dst),
7510
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7511
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7512
- 0,
7513
- 0.0f, 0.0f,
7514
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7515
- }, dryrun);
7506
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
7507
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT, std::move(p), dryrun);
7516
7508
}
7517
7509
7518
7510
static void ggml_vk_repeat_back(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
7519
- const uint32_t src0_type_size = ggml_type_size(src0->type);
7520
- const uint32_t dst_type_size = ggml_type_size(dst->type);
7521
-
7522
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT_BACK, {
7523
- (uint32_t)ggml_nelements(dst),
7524
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7525
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7526
- 0,
7527
- 0.0f, 0.0f,
7528
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7529
- }, dryrun);
7511
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst));
7512
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT_BACK, std::move(p), dryrun);
7530
7513
}
7531
7514
7532
7515
static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) {
@@ -7544,14 +7527,8 @@ static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const
7544
7527
}
7545
7528
}
7546
7529
7547
- ggml_vk_op_f32<vk_op_unary_push_constants>(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, {
7548
- ne,
7549
- (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size,
7550
- (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size,
7551
- 0,
7552
- 0.0f, 0.0f,
7553
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7554
- }, dryrun);
7530
+ vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ne);
7531
+ ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, std::move(p), dryrun);
7555
7532
}
7556
7533
7557
7534
static void ggml_vk_silu_back(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) {
@@ -8862,6 +8839,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
8862
8839
case GGML_OP_COS:
8863
8840
case GGML_OP_CLAMP:
8864
8841
case GGML_OP_PAD:
8842
+ case GGML_OP_ROLL:
8865
8843
case GGML_OP_CPY:
8866
8844
case GGML_OP_CONT:
8867
8845
case GGML_OP_DUP:
@@ -9031,6 +9009,10 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
9031
9009
case GGML_OP_PAD:
9032
9010
ggml_vk_pad(ctx, compute_ctx, src0, node, dryrun);
9033
9011
9012
+ break;
9013
+ case GGML_OP_ROLL:
9014
+ ggml_vk_roll(ctx, compute_ctx, src0, node, dryrun);
9015
+
9034
9016
break;
9035
9017
case GGML_OP_CPY:
9036
9018
case GGML_OP_CONT:
@@ -9247,6 +9229,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor *
9247
9229
case GGML_OP_COS:
9248
9230
case GGML_OP_CLAMP:
9249
9231
case GGML_OP_PAD:
9232
+ case GGML_OP_ROLL:
9250
9233
case GGML_OP_CPY:
9251
9234
case GGML_OP_CONT:
9252
9235
case GGML_OP_DUP:
@@ -10368,6 +10351,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
10368
10351
case GGML_OP_CONCAT:
10369
10352
case GGML_OP_SCALE:
10370
10353
case GGML_OP_PAD:
10354
+ case GGML_OP_ROLL:
10371
10355
case GGML_OP_DIAG_MASK_INF:
10372
10356
case GGML_OP_SOFT_MAX:
10373
10357
case GGML_OP_SOFT_MAX_BACK:
0 commit comments