[CI/Build] Suppress divide-by-zero and missing return statement warnings (#7001)

tlrmchlsmth · web-flow · commit 6e4852ce28ad · 2024-08-05T16:00:01.000-04:00
diff --git a/csrc/attention/dtype_bfloat16.cuh b/csrc/attention/dtype_bfloat16.cuh
@@ -94,6 +94,7 @@ inline __device__ float2 bf1622float2(const __nv_bfloat162 val) {
 #else
   return __bfloat1622float2(val);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 inline __device__ __nv_bfloat162 bf162bf162(const __nv_bfloat16 val) {
@@ -102,6 +103,7 @@ inline __device__ __nv_bfloat162 bf162bf162(const __nv_bfloat16 val) {
 #else
   return __bfloat162bfloat162(val);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 // Vector addition.
@@ -115,6 +117,7 @@ inline __device__ __nv_bfloat16 add(__nv_bfloat16 a, __nv_bfloat16 b) {
   return __hadd(a, b);
   #endif
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 inline __device__ __nv_bfloat162 add(__nv_bfloat162 a, __nv_bfloat162 b) {
@@ -123,6 +126,7 @@ inline __device__ __nv_bfloat162 add(__nv_bfloat162 a, __nv_bfloat162 b) {
 #else
   return __hadd2(a, b);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 inline __device__ bf16_4_t add(bf16_4_t a, bf16_4_t b) {
@@ -170,6 +174,7 @@ inline __device__ __nv_bfloat16 mul(__nv_bfloat16 a, __nv_bfloat16 b) {
 #else
   return __hmul(a, b);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 template <>
@@ -179,6 +184,7 @@ inline __device__ __nv_bfloat162 mul(__nv_bfloat162 a, __nv_bfloat162 b) {
 #else
   return __hmul2(a, b);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 template <>
@@ -289,6 +295,7 @@ inline __device__ __nv_bfloat162 fma(__nv_bfloat162 a, __nv_bfloat162 b,
 #else
   return __hfma2(a, b, c);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 inline __device__ __nv_bfloat162 fma(__nv_bfloat16 a, __nv_bfloat162 b,
@@ -298,6 +305,7 @@ inline __device__ __nv_bfloat162 fma(__nv_bfloat16 a, __nv_bfloat162 b,
 #else
   return __hfma2(bf162bf162(a), b, c);
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 inline __device__ bf16_4_t fma(bf16_4_t a, bf16_4_t b, bf16_4_t c) {
diff --git a/csrc/quantization/awq/dequantize.cuh b/csrc/quantization/awq/dequantize.cuh
@@ -95,6 +95,7 @@ __device__ uint4 dequantize_s4_to_fp16x2(uint32_t const& source) {
 
   return result;
 #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 }  // namespace awq
diff --git a/csrc/quantization/fp8/nvidia/quant_utils.cuh b/csrc/quantization/fp8/nvidia/quant_utils.cuh
@@ -475,6 +475,7 @@ __inline__ __device__ uint8_t scaled_vec_conversion<uint8_t, __nv_bfloat16>(
                                                  __NV_SATFINITE, fp8_type);
   return (uint8_t)res;
     #endif
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 // float -> fp8
@@ -508,7 +509,7 @@ __inline__ __device__ Tout convert(const Tin& x) {
   }
   #endif
   assert(false);
-  return {};  // Squash missing return statement warning
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
 template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
@@ -521,7 +522,7 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
   }
   #endif
   assert(false);
-  return {};  // Squash missing return statement warning
+  __builtin_unreachable();  // Suppress missing return statement warning
 }
 
   // The following macro is used to dispatch the conversion function based on
diff --git a/csrc/quantization/gptq_marlin/gptq_marlin.cu b/csrc/quantization/gptq_marlin/gptq_marlin.cu
@@ -1130,12 +1130,12 @@ __global__ void Marlin(
   };
 
   auto fetch_zp_to_registers = [&](int k, int full_pipe) {
-    if constexpr (has_zp) {
-      // This code does not handle group_blocks == 0,
-      // which signifies act_order.
-      // has_zp implies AWQ, which doesn't have act_order,
-      static_assert(group_blocks != 0);
+    // This code does not handle group_blocks == 0,
+    // which signifies act_order.
+    // has_zp implies AWQ, which doesn't have act_order,
+    static_assert(!has_zp || group_blocks != 0);
 
+    if constexpr (has_zp) {
       int pipe = full_pipe % stages;
 
       if constexpr (group_blocks == -1) {
@@ -1161,7 +1161,13 @@ __global__ void Marlin(
         cur_k += k_iter_size * (k % b_sh_wr_iters);
 
         int k_blocks = cur_k / 16;
-        int cur_group_id = k_blocks / group_blocks;
+        int cur_group_id = 0;
+
+        // Suppress bogus and persistent divide-by-zero warning
+  #pragma nv_diagnostic push
+  #pragma nv_diag_suppress divide_by_zero
+        cur_group_id = k_blocks / group_blocks;
+  #pragma nv_diagnostic pop
 
         int4* sh_zp_stage = sh_zp + zp_sh_stage * pipe;
 

Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,7 @@ __device__ uint4 dequantize_s4_to_fp16x2(uint32_t const& source) {`
`95`	`95`
`96`	`96`	`return result;`
`97`	`97`	`#endif`
	`98`	`+ __builtin_unreachable(); // Suppress missing return statement warning`
`98`	`99`	`}`
`99`	`100`
`100`	`101`	`} // namespace awq`
Original file line number	Diff line number	Diff line change
`@@ -475,6 +475,7 @@ __inline__ __device__ uint8_t scaled_vec_conversion<uint8_t, __nv_bfloat16>(`
`475`	`475`	`__NV_SATFINITE, fp8_type);`
`476`	`476`	`return (uint8_t)res;`
`477`	`477`	`#endif`
	`478`	`+ __builtin_unreachable(); // Suppress missing return statement warning`
`478`	`479`	`}`
`479`	`480`
`480`	`481`	`// float -> fp8`
`@@ -508,7 +509,7 @@ __inline__ __device__ Tout convert(const Tin& x) {`
`508`	`509`	`}`
`509`	`510`	`#endif`
`510`	`511`	`assert(false);`
`511`		`- return {}; // Squash missing return statement warning`
	`512`	`+ __builtin_unreachable(); // Suppress missing return statement warning`
`512`	`513`	`}`
`513`	`514`
`514`	`515`	`template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>`
`@@ -521,7 +522,7 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {`
`521`	`522`	`}`
`522`	`523`	`#endif`
`523`	`524`	`assert(false);`
`524`		`- return {}; // Squash missing return statement warning`
	`525`	`+ __builtin_unreachable(); // Suppress missing return statement warning`
`525`	`526`	`}`
`526`	`527`
`527`	`528`	`// The following macro is used to dispatch the conversion function based on`