Skip to content

Commit e8963aa

Browse files
committed
cuda: Remove comments from out-prod kernel
1 parent 40a5197 commit e8963aa

File tree

2 files changed

+0
-25
lines changed

2 files changed

+0
-25
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3202,7 +3202,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
32023202
}
32033203
} break;
32043204
case GGML_OP_OUT_PROD:
3205-
// return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32;
32063205
return op->type == GGML_TYPE_F32;
32073206
case GGML_OP_GET_ROWS:
32083207
{

ggml/src/ggml-cuda/out-prod.cu

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,6 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
1212
const bool src0_is_quantized = (src0->type != GGML_TYPE_F32 && src0->type != GGML_TYPE_F16);
1313
const bool src1_is_quantized = (src1->type != GGML_TYPE_F32 && src1->type != GGML_TYPE_F16);
1414

15-
// if (src0_is_quantized || src1_is_quantized) {
16-
// printf("DEBUG: OUT_PROD with quantized tensors - src0_quantized=%d, src1_quantized=%d\n",
17-
// src0_is_quantized, src1_is_quantized);
18-
// fflush(stdout);
19-
// }
20-
21-
// GGML_ASSERT(src0->type == GGML_TYPE_F32);
22-
// GGML_ASSERT(src1->type == GGML_TYPE_F32);
23-
2415
GGML_ASSERT(dst->type == GGML_TYPE_F32);
2516

2617
// temp buffers
@@ -74,9 +65,6 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
7465
GGML_ASSERT(ne2 == src1->ne[2]);
7566
GGML_ASSERT(ne3 == src1->ne[3]);
7667

77-
// const float * src0_d = (const float *) src0->data;
78-
// const float * src1_d = (const float *) src1->data;
79-
8068
// Use dequantized data
8169
const float * src0_d = src0_f32;
8270
const float * src1_d = src1_f32;
@@ -89,28 +77,21 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
8977

9078
CUBLAS_CHECK(cublasSetStream(handle, stream));
9179

92-
// const int64_t lda = nb01 / sizeof(float);
9380
const int64_t lda = allocated_src0 ? ne00 : (nb01 / sizeof(float));
9481
const int64_t ldc = nb1 / sizeof(float);
9582

9683
const bool src1_T = ggml_is_transposed(src1);
9784
const cublasOperation_t src1_cublas_op = src1_T ? CUBLAS_OP_N : CUBLAS_OP_T;
98-
// const int64_t ldb = (src1_T ? nb10 : nb11) / sizeof(float);
9985
const int64_t ldb = allocated_src1 ?
10086
(src1_T ? ne10 : ne11) :
10187
((src1_T ? nb10 : nb11) / sizeof(float));
10288

103-
// GGML_ASSERT( (src1_T ? nb11 : nb10) == sizeof(float));
10489
// Only assert for non dequantized src1
10590
if (!allocated_src1) {
10691
GGML_ASSERT((src1_T ? nb11 : nb10) == sizeof(float));
10792
}
10893

10994
// data strides in dimensions 2/3
110-
// const size_t s02 = nb02 / sizeof(float);
111-
// const size_t s03 = nb03 / sizeof(float);
112-
// const size_t s12 = nb12 / sizeof(float);
113-
// const size_t s13 = nb13 / sizeof(float);
11495
const size_t s02 = allocated_src0 ? (ne00 * ne01) : nb02 / sizeof(float);
11596
const size_t s03 = allocated_src0 ? (ne00 * ne01 * ne02): nb03 / sizeof(float);
11697
const size_t s12 = allocated_src1 ? (ne10 * ne11) : nb12 / sizeof(float);
@@ -136,13 +117,8 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
136117

137118
if (allocated_src0) {
138119
CUDA_CHECK(cudaFreeAsync(src0_f32, stream));
139-
// printf("DEBUG: Freed dequantized src0 buffer\n");
140120
}
141121
if (allocated_src1) {
142122
CUDA_CHECK(cudaFreeAsync(src1_f32, stream));
143-
// // printf("DEBUG: Freed dequantized src1 buffer\n");
144123
}
145-
146-
// printf("DEBUG: CUDA OUT_PROD completed successfully\n");
147-
fflush(stdout);
148124
}

0 commit comments

Comments
 (0)