cuda: Remove comments from out-prod kernel

makaveli10 · makaveli10 · commit e8963aafaeb8 · 2025-08-26T10:33:05.000-04:00
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3202,7 +3202,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
                 }
             } break;
         case GGML_OP_OUT_PROD:
-            // return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32;
             return op->type == GGML_TYPE_F32;
         case GGML_OP_GET_ROWS:
             {
diff --git a/ggml/src/ggml-cuda/out-prod.cu b/ggml/src/ggml-cuda/out-prod.cu
@@ -12,15 +12,6 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     const bool src0_is_quantized = (src0->type != GGML_TYPE_F32 && src0->type != GGML_TYPE_F16);
     const bool src1_is_quantized = (src1->type != GGML_TYPE_F32 && src1->type != GGML_TYPE_F16);
 
-    // if (src0_is_quantized || src1_is_quantized) {
-    //     printf("DEBUG: OUT_PROD with quantized tensors - src0_quantized=%d, src1_quantized=%d\n", 
-    //            src0_is_quantized, src1_is_quantized);
-    //     fflush(stdout);
-    // }
-
-    // GGML_ASSERT(src0->type == GGML_TYPE_F32);
-    // GGML_ASSERT(src1->type == GGML_TYPE_F32);
-
     GGML_ASSERT(dst->type  == GGML_TYPE_F32);
 
     // temp buffers
@@ -74,9 +65,6 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     GGML_ASSERT(ne2 == src1->ne[2]);
     GGML_ASSERT(ne3 == src1->ne[3]);
 
-    // const float * src0_d = (const float *) src0->data;
-    // const float * src1_d = (const float *) src1->data;
-
     // Use dequantized data
     const float * src0_d = src0_f32;
     const float * src1_d = src1_f32;
@@ -89,28 +77,21 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 
     CUBLAS_CHECK(cublasSetStream(handle, stream));
 
-    // const int64_t lda = nb01 / sizeof(float);
     const int64_t lda = allocated_src0 ? ne00 : (nb01 / sizeof(float));
     const int64_t ldc = nb1  / sizeof(float);
 
     const bool src1_T = ggml_is_transposed(src1);
     const cublasOperation_t src1_cublas_op =  src1_T ? CUBLAS_OP_N : CUBLAS_OP_T;
-    // const int64_t           ldb            = (src1_T ?        nb10 :        nb11) /  sizeof(float);
     const int64_t           ldb            = allocated_src1 ? 
                                              (src1_T ? ne10 : ne11) :
                                              ((src1_T ?        nb10 :        nb11) /  sizeof(float));
                                 
-    // GGML_ASSERT(                             (src1_T ?        nb11 :        nb10) == sizeof(float));
     // Only assert for non dequantized src1
     if (!allocated_src1) {
         GGML_ASSERT((src1_T ? nb11 : nb10) == sizeof(float));
     }
 
     // data strides in dimensions 2/3
-    // const size_t s02 = nb02 / sizeof(float);
-    // const size_t s03 = nb03 / sizeof(float);
-    // const size_t s12 = nb12 / sizeof(float);
-    // const size_t s13 = nb13 / sizeof(float);
     const size_t s02 = allocated_src0 ? (ne00 * ne01) : nb02 / sizeof(float);
     const size_t s03 = allocated_src0 ? (ne00 * ne01 * ne02): nb03 / sizeof(float);
     const size_t s12 = allocated_src1 ? (ne10 * ne11) :  nb12 / sizeof(float);
@@ -136,13 +117,8 @@ void ggml_cuda_out_prod(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
 
     if (allocated_src0) {
         CUDA_CHECK(cudaFreeAsync(src0_f32, stream));
-        // printf("DEBUG: Freed dequantized src0 buffer\n");
     }
     if (allocated_src1) {
         CUDA_CHECK(cudaFreeAsync(src1_f32, stream));
-        // // printf("DEBUG: Freed dequantized src1 buffer\n");
     }
-    
-    // printf("DEBUG: CUDA OUT_PROD completed successfully\n");
-    fflush(stdout);
 }

Original file line number	Diff line number	Diff line change
`@@ -3202,7 +3202,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g`
`3202`	`3202`	`}`
`3203`	`3203`	`} break;`
`3204`	`3204`	`case GGML_OP_OUT_PROD:`
`3205`		`- // return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32;`
`3206`	`3205`	`return op->type == GGML_TYPE_F32;`
`3207`	`3206`	`case GGML_OP_GET_ROWS:`
`3208`	`3207`	`{`