Review: add const and use int64_t for nelements

am17an · am17an · commit 8f8e95ebe58f · 2025-07-07T22:58:47.000+08:00
diff --git a/ggml/src/ggml-cuda/upscale.cu b/ggml/src/ggml-cuda/upscale.cu
@@ -28,22 +28,22 @@ static __global__ void upscale_f32_bilinear(const float * x, float * dst,
         const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst,
         const float sf0, const float sf1, const float sf2, const float sf3,
         const float pixel_offset) {
-    int index              = threadIdx.x + blockIdx.x * blockDim.x;
-    int dst_total_elements = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
+    const int64_t index              = threadIdx.x + blockIdx.x * blockDim.x;
+    const int64_t dst_total_elements = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
 
     if (index >= dst_total_elements) {
         return;
     }
 
-    int i10_dst = index % ne10_dst;
-    int i11_dst = (index / ne10_dst) % ne11_dst;
-    int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst;
-    int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst);
+    const int i10_dst = index % ne10_dst;
+    const int i11_dst = (index / ne10_dst) % ne11_dst;
+    const int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst;
+    const int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst);
 
-    int i02_src = (int)(i12_dst / sf2);
-    int i03_src = (int)(i13_dst / sf3);
+    const int i02_src = (int)(i12_dst / sf2);
+    const int i03_src = (int)(i13_dst / sf3);
 
-    float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
+    const float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset;
     int y0_src    = (int)floorf(y_src_f);
     int y1_src    = y0_src + 1;
 
@@ -63,10 +63,10 @@ static __global__ void upscale_f32_bilinear(const float * x, float * dst,
     float dx = x_src_f - (float)x0_src;
     dx = max(0.0f, min(dx, 1.0f));
 
-    const float * p_a = (const float *)((const char *)x + (long)x0_src * nb00 + (long)y0_src * nb01 + (long)i02_src * nb02 + (long)i03_src * nb03);
-    const float * p_b = (const float *)((const char *)x + (long)x1_src * nb00 + (long)y0_src * nb01 + (long)i02_src * nb02 + (long)i03_src * nb03);
-    const float * p_c = (const float *)((const char *)x + (long)x0_src * nb00 + (long)y1_src * nb01 + (long)i02_src * nb02 + (long)i03_src * nb03);
-    const float * p_d = (const float *)((const char *)x + (long)x1_src * nb00 + (long)y1_src * nb01 + (long)i02_src * nb02 + (long)i03_src * nb03);
+    const float * p_a = (const float *)((const char *)x + (int64_t)x0_src * nb00 + (int64_t)y0_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03);
+    const float * p_b = (const float *)((const char *)x + (int64_t)x1_src * nb00 + (int64_t)y0_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03);
+    const float * p_c = (const float *)((const char *)x + (int64_t)x0_src * nb00 + (int64_t)y1_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03);
+    const float * p_d = (const float *)((const char *)x + (int64_t)x1_src * nb00 + (int64_t)y1_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03);
 
     const float val_a = *p_a;
     const float val_b = *p_b;
@@ -86,8 +86,8 @@ static void upscale_f32_cuda(const float * x, float * dst,
         const int ne10, const int ne11, const int ne12, const int ne13,
         const float sf0, const float sf1, const float sf2, const float sf3,
         cudaStream_t stream) {
-    int dst_size   = ne10 * ne11 * ne12 * ne13;
-    int num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE;
+    const int64_t dst_size   = ne10 * ne11 * ne12 * ne13;
+    const int64_t num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE;
 
     upscale_f32<<<num_blocks, CUDA_UPSCALE_BLOCK_SIZE,0,stream>>>(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3);
 }
@@ -98,8 +98,8 @@ static void upscale_f32_bilinear_cuda(const float * x, float * dst,
         const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst,
         const float sf0, const float sf1, const float sf2, const float sf3,
         const float pixel_offset, cudaStream_t stream) {
-    int dst_size   = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
-    int num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE;
+    const int64_t dst_size   = ne10_dst * ne11_dst * ne12_dst * ne13_dst;
+    const int64_t num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE;
 
     upscale_f32_bilinear<<<num_blocks, CUDA_UPSCALE_BLOCK_SIZE,0,stream>>>(x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset);
 }
@@ -119,7 +119,7 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
     float sf0 = (float)dst->ne[0]/src0->ne[0];
     float sf1 = (float)dst->ne[1]/src0->ne[1];
     float sf2 = (float)dst->ne[2]/src0->ne[2];
-    float sf3 = (float)dst->ne[3]/src0->ne[3];
+    const float sf3 = (float)dst->ne[3]/src0->ne[3];
 
     if (mode == GGML_SCALE_MODE_NEAREST) {
         upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream);