Skip to content

Commit 6d84cbb

Browse files
committed
Fix parameter order in conv2d_implicit and add comprehensive test cases for 2D convolution
1 parent 3877608 commit 6d84cbb

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

ggml/src/ggml-cuda/conv2d-implicit.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ void ggml_cuda_op_conv2d_implicit(ggml_backend_cuda_context & ctx, ggml_tensor *
355355

356356
const int64_t total = B * OC * OH * OW;
357357

358-
param_t params = { B, IC, IH, IW, OC, KH, KW, ST_X, ST_Y, PD_X, PD_Y, DL_X, DL_Y, OH, OW };
358+
param_t params = { B, IC, IH, IW, OC, KH, KW, ST_Y, ST_X, PD_Y, PD_X, DL_Y, DL_X, OH, OW };
359359

360360
if (kernel->type == GGML_TYPE_F16) {
361361
conv2d_implicit_cuda_f16(X_D, (half *) K_D, Y_D, params, st);

tests/test-backend-ops.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5790,6 +5790,30 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
57905790
}
57915791
}
57925792

5793+
for (uint32_t s0 : { 1, 3 }) {
5794+
for (uint32_t p1 : { 2, 5 }) {
5795+
for (uint32_t Cin : { 1, 25 }) {
5796+
for (uint32_t Cout : { 1, 12 }) {
5797+
for (uint32_t KH : { 1, 2, 3, 11 }) {
5798+
for (uint32_t KW : { 1, 2, 3, 11 }) {
5799+
for (uint32_t H : { 1, 133 }) {
5800+
for (uint32_t W : { 1, 141 }) {
5801+
if (calc_conv_output_size(W, KW, s0, p0, d0) > 0 &&
5802+
calc_conv_output_size(H, KH, s1, p1, d1) > 0) {
5803+
for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
5804+
test_cases.emplace_back(new test_conv_2d_implicit(
5805+
{ W, H, Cin, 2 }, { KW, KH, Cin, Cout }, kernel_type, s0, s1, p0, p1, d0, d1, false));
5806+
}
5807+
}
5808+
}
5809+
}
5810+
}
5811+
}
5812+
}
5813+
}
5814+
}
5815+
}
5816+
57935817
// sycl backend will limit task global_range < MAX_INT
57945818
// test cases for 2D im2col with large input W and H (occurs in stable-diffusion)
57955819
// however these cases need to alloc more memory which may fail in some devices (Intel Arc770, etc.)

0 commit comments

Comments
 (0)