Skip to content

Commit a5b68bc

Browse files
committed
conv3D WIP: fixed a launch param bug, results now correct; performace 3x slower than im2col
1 parent e802036 commit a5b68bc

File tree

2 files changed

+23
-21
lines changed

2 files changed

+23
-21
lines changed

ggml/src/ggml-cuda/conv3d-implicit.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ static void conv3d_implicit_cuda(const float * X_D, const T * K_D, float * Y_D,
10071007
const uint TM = conv_shapes[6][CONV_SHAPE];
10081008
const uint TN = conv_shapes[7][CONV_SHAPE];
10091009
const uint NUM_THREADS = conv_shapes[8][CONV_SHAPE];
1010-
int blockx = ((P.Oh * P.Ow + BM - 1) / BM); // blockx number
1010+
int blockx = ((P.Od * P.Oh * P.Ow + BM - 1) / BM); // blockx number
10111011
int blocky = (P.k + BN-1) / BN; // blocky number
10121012
int blockz = P.n; // blockz number
10131013
int thready = 1; // thready number per block

tests/test-conv3d.cpp

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ struct ggml_cgraph * build_graph_1(const test_model& model, const int64_t ic, co
241241
ic, n, oc);
242242
ggml_set_name(wino_res, "wino_res");
243243
ggml_build_forward_expand(gf, wino_res);
244-
// ne = wino_res->ne;
244+
// int64_t *ne = wino_res->ne;
245245
// printf("wino: (%zu, %zu, %zu, %zu) \n", ne[0], ne[1], ne[2], ne[3]);
246246
ggml_free(ctx0);
247247
return gf;
@@ -323,9 +323,13 @@ int main(void)
323323
// std::make_tuple(960,320,104,152,3,3),
324324
// std::make_tuple(1280,1280,26,38,3,3),
325325
std::make_tuple(320,1280,26,38,8,3,3,3),
326-
// std::make_tuple(1280,1280,26,38,8,3,3,3),
327-
// std::make_tuple(320,1280,52,76,8,3,3,3),
328-
// std::make_tuple(1280,1280,52,76,8,3,3,3),
326+
std::make_tuple(1280,1280,26,38,8,3,3,3),
327+
std::make_tuple(320,1280,52,76,8,3,3,3),
328+
std::make_tuple(1280,1280,52,76,8,3,3,3),
329+
std::make_tuple(320,1280,104,152,8,3,3,3),
330+
std::make_tuple(1280,1280,104,152,8,3,3,3),
331+
std::make_tuple(320,1280,208,304,4,3,3,3),
332+
std::make_tuple(640,1280,208,304,4,3,3,3),
329333
// std::make_tuple(1280,1280,26,38,1,1),
330334
// std::make_tuple(256,128,768,1024,3,3),
331335
// std::make_tuple(128,3,768,1024,3,3),
@@ -393,29 +397,27 @@ int main(void)
393397

394398
if(k==0) {
395399
k = 1;
396-
fprintf(stderr, "| (IC, OC, IW, IH, KW, KH) | im2col+GEMM TIME | im2col+GEMM VRAM | implicit GEMM TIME | implicit GEMM VRAM \n");
400+
fprintf(stderr, "| (IC, OC, IW, IH, ID, KW, KH, KD) | im2col+GEMM TIME | im2col+GEMM VRAM | implicit GEMM TIME | implicit GEMM VRAM \n");
397401
fprintf(stderr, "| --- | --- | --- | --- | --- \n");
398402
}
399403

400-
fprintf(stderr, " | (%d, %d, %d, %d, %d, %d) | %.2f ms | %.2f MB | %.2f ms | %.2f MB\n",
401-
std::get<0>(c), std::get<1>(c), std::get<2>(c), std::get<3>(c), std::get<4>(c), std::get<5>(c),
404+
fprintf(stderr, " | (%d, %d, %d, %d, %d, %d, %d, %d) | %.2f ms | %.2f MB | %.2f ms | %.2f MB\n",
405+
std::get<0>(c), std::get<1>(c), std::get<2>(c),
406+
std::get<3>(c), std::get<4>(c), std::get<5>(c),
407+
std::get<6>(c), std::get<7>(c),
402408
run_time0, mem_size0/1024.0f/1024.0f,
403409
run_time1, mem_size1/1024.0f/1024.0f);
404410

405411

406-
// for(int i = 0; i < ggml_nelements(wino_res); i++) {
407-
// for(int i = 0; i < 26*38; i++) {
408-
for(int i = 0; i < conv2d_data.size(); i++) {
409-
// float diff = fabs(conv2d_data[i] - wino_data[i]);
410-
411-
float diff = fabs(im2col_data[i] - conv2d_data[i]);
412-
// if(diff > 0.5) {
413-
printf("(%7.3f, %7.3f, %.2f, %d) \n",
414-
im2col_data[i], conv2d_data[i],
415-
diff, i);
416-
// break;
417-
// }
418-
}
412+
// for(int i = 0; i < conv2d_data.size(); i++) {
413+
// float diff = fabs(im2col_data[i] - conv2d_data[i]);
414+
// // if(diff > 0.5) {
415+
// printf("(%7.3f, %7.3f, %.2f, %d) \n",
416+
// im2col_data[i], conv2d_data[i],
417+
// diff, i);
418+
// // break;
419+
// // }
420+
// }
419421

420422
ggml_free(model.ctx);
421423
ggml_backend_buffer_free(model.buffer);

0 commit comments

Comments
 (0)