Skip to content

Commit 610e41a

Browse files
committed
still debugging
1 parent c45df12 commit 610e41a

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

ggml/src/ggml-cuda/conv2d-implicit.cu

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,14 +1130,26 @@ static __global__ void conv2d_implicit_kernel(const half * __restrict__ input,
11301130
);
11311131
}
11321132
}
1133+
// if(threadIdx.x == 0 && threadIdx.y ==0 && blockIdx.x ==0 && blockIdx.y ==0){
1134+
// printf(" %d, %d: %f, %f, %f, %f \n", block_k, mma_k, __half2float(acc_register_[3][0][0]), __half2float(acc_register_[3][0][1]),
1135+
// __half2float(acc_register_[3][0][2]), __half2float(acc_register_[3][0][3]));
1136+
// printf(" %d, %d: %f, %f, %f, %f \n", block_k, mma_k, __half2float(A_register_[3][mma_k][0]), __half2float(A_register_[3][mma_k][1]),
1137+
// __half2float(A_register_[3][mma_k][2]), __half2float(A_register_[3][mma_k][3]));
1138+
// printf(" %d, %d: %f, %f, %f, %f \n", block_k, mma_k, __half2float(B_register_[mma_k][0][0]), __half2float(B_register_[mma_k][0][1]),
1139+
// __half2float(B_register_[mma_k][0][2]), __half2float(B_register_[mma_k][0][3]));
1140+
// }
1141+
// if(threadIdx.x < 4 && threadIdx.y ==0 && blockIdx.x ==0 && blockIdx.y ==0){
1142+
// printf("A %d, %d, %d: %f, %f \n", block_k, mma_k, threadIdx.x, __half2float(A_register_[3][mma_k][0]), __half2float(A_register_[3][mma_k][1]));
1143+
// printf("B %d, %d, %d: %f, %f \n", block_k, mma_k, threadIdx.x, __half2float(B_register_[mma_k][0][0]), __half2float(B_register_[mma_k][0][1]));
1144+
// }
11331145
}
1134-
// if(threadIdx.x == 4 && threadIdx.y ==0 && blockIdx.x ==0 && blockIdx.y ==0){
1135-
// printf(" %d: %f, %f, %f, %f \n", block_k, __half2float(acc_register_[0][0][0]), __half2float(acc_register_[0][0][1]),
1136-
// __half2float(acc_register_[0][0][2]), __half2float(acc_register_[0][0][3]));
1137-
// printf(" %d: %f, %f, %f, %f \n", block_k, __half2float(A_register_[0][0][0]), __half2float(A_register_[0][0][1]),
1138-
// __half2float(A_register_[0][0][2]), __half2float(A_register_[0][0][3]));
1139-
// printf(" %d: %f, %f, %f, %f \n", block_k, __half2float(B_register_[0][0][0]), __half2float(B_register_[0][0][1]),
1140-
// __half2float(B_register_[0][0][2]), __half2float(B_register_[0][0][3]));
1146+
// if(threadIdx.x == 0 && threadIdx.y ==0 && blockIdx.x ==0 && blockIdx.y ==0){
1147+
// printf(" %d: %f, %f, %f, %f \n", block_k, __half2float(acc_register_[3][0][0]), __half2float(acc_register_[3][0][1]),
1148+
// __half2float(acc_register_[3][0][2]), __half2float(acc_register_[3][0][3]));
1149+
// printf(" %d: %f, %f, %f, %f \n", block_k, __half2float(A_register_[3][0][0]), __half2float(A_register_[3][0][1]),
1150+
// __half2float(A_register_[3][0][2]), __half2float(A_register_[3][0][3]));
1151+
// printf(" %d: %f, %f, %f, %f \n", block_k, __half2float(B_register_[3][0][0]), __half2float(B_register_[3][0][1]),
1152+
// __half2float(B_register_[3][0][2]), __half2float(B_register_[3][0][3]));
11411153
// }
11421154

11431155

tests/test-conv2d-implicit.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ void load_model(test_model & model, int ic, int oc, int iw, int ih, bool use_gpu
4848
// Initialize adata
4949
std::vector<float> adata(KW * KH * IC * OC);
5050
for (int i = 0; i < KW * KH * IC * OC; i++) {
51-
adata[i] = 2.f;
51+
adata[i] = 0.2f;
5252
}
5353

5454
// Convert adata to fp16 format
@@ -344,7 +344,7 @@ int main(void)
344344
// std::make_tuple(640,640,52,76),
345345
// std::make_tuple(640,640,104,152),
346346
// std::make_tuple(960,320,104,152),
347-
std::make_tuple(640,128,26,38),
347+
std::make_tuple(128,128,26,38),
348348
// std::make_tuple(1280,640,52,76),
349349
// std::make_tuple(1920,1280,26,38),
350350
// std::make_tuple(2560,1280,26,38),

0 commit comments

Comments
 (0)