[libclc] Refine __clc_fp*_subnormals_supported and __clc_flush_denormal_if_not_supported

wenju-he · arsenm · wenju-he · commit 7e2d210d9c6c · 2025-09-09T11:11:28.000+02:00
Remove the dependency on the libclc build-time configuration for __clc_fp*_subnormals_supported. The check is now implemented with LLVM intrinsics so it can be resolved during target lowering or at runtime. Improve __clc_flush_denormal_if_not_supported implementation as well. It doesn't use __clc_fp*_subnormals_supported which canonicalizes sNaN and thus the new implementation is more foldable. Remove cmake option ENABLE_RUNTIME_SUBNORMAL and related code. Resolves #153148 Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
@@ -41,8 +41,6 @@ set( LIBCLC_MIN_LLVM 3.9.0 )
 set( LIBCLC_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of libclc targets to build, or 'all'." )
 
-option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF )
-
 option(
   LIBCLC_USE_SPIRV_BACKEND "Build SPIR-V targets with the SPIR-V backend." OFF
 )
@@ -231,19 +229,6 @@ set( tahiti_aliases pitcairn verde oland hainan bonaire kabini kaveri hawaii
 configure_file( libclc.pc.in libclc.pc @ONLY )
 install( FILES ${CMAKE_CURRENT_BINARY_DIR}/libclc.pc DESTINATION "${CMAKE_INSTALL_DATADIR}/pkgconfig" )
 
-if( ENABLE_RUNTIME_SUBNORMAL )
-  foreach( file IN ITEMS subnormal_use_default subnormal_disable )
-    link_bc(
-       TARGET ${file}
-       INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/opencl/lib/generic/${file}.ll
-    )
-    install(
-      FILES $<TARGET_PROPERTY:${file},TARGET_FILE>
-      DESTINATION "${CMAKE_INSTALL_DATADIR}/clc"
-    )
-  endforeach()
-endif()
-
 find_package( Python3 REQUIRED COMPONENTS Interpreter )
 file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/utils/gen_convert.py script_loc )
 add_custom_command(
@@ -371,9 +356,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
       list( APPEND opencl_gen_files clspv-convert.cl )
     else()
       list( APPEND opencl_gen_files convert.cl )
-      if ( NOT ENABLE_RUNTIME_SUBNORMAL )
-        list( APPEND opencl_lib_files opencl/lib/generic/subnormal_use_default.ll )
-      endif()
     endif()
   endif()
 
diff --git a/libclc/clc/include/clc/math/clc_subnormal_config.h b/libclc/clc/include/clc/math/clc_subnormal_config.h
@@ -10,7 +10,6 @@
 
 #include <clc/clcfunc.h>
 
-_CLC_DECL bool __clc_subnormals_disabled();
 _CLC_DECL bool __clc_fp16_subnormals_supported();
 _CLC_DECL bool __clc_fp32_subnormals_supported();
 _CLC_DECL bool __clc_fp64_subnormals_supported();
diff --git a/libclc/clc/include/clc/math/math.h b/libclc/clc/include/clc/math/math.h
@@ -11,7 +11,6 @@
 
 #include <clc/clc_as_type.h>
 #include <clc/clcfunc.h>
-#include <clc/math/clc_subnormal_config.h>
 
 #define SNAN 0x001
 #define QNAN 0x002
@@ -66,13 +65,11 @@ bool __attribute__((noinline)) __clc_runtime_has_hw_fma32(void);
 #define LOG_MAGIC_NUM_SP32 (1 + NUMEXPBITS_SP32 - EXPBIAS_SP32)
 
 _CLC_OVERLOAD _CLC_INLINE float __clc_flush_denormal_if_not_supported(float x) {
-  int ix = __clc_as_int(x);
-  if (!__clc_fp32_subnormals_supported() && ((ix & EXPBITS_SP32) == 0) &&
-      ((ix & MANTBITS_SP32) != 0)) {
-    ix &= SIGNBIT_SP32;
-    x = __clc_as_float(ix);
-  }
-  return x;
+  // Avoid calling __clc_fp32_subnormals_supported here: it uses
+  // llvm.canonicalize, which quiets sNaN.
+  return __builtin_fabsf(x) < 0x1p-149f
+             ? __builtin_elementwise_copysign(0.0f, x)
+             : x;
 }
 
 #ifdef cl_khr_fp64
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
@@ -137,6 +137,7 @@ math/clc_sincos_helpers.cl
 math/clc_sinh.cl
 math/clc_sinpi.cl
 math/clc_sqrt.cl
+math/clc_subnormal_config.cl
 math/clc_sw_fma.cl
 math/clc_tables.cl
 math/clc_tan.cl
diff --git a/libclc/clc/lib/generic/math/clc_exp10.cl b/libclc/clc/lib/generic/math/clc_exp10.cl
@@ -11,7 +11,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_isnan.h>
diff --git a/libclc/clc/lib/generic/math/clc_hypot.cl b/libclc/clc/lib/generic/math/clc_hypot.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_sqrt.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/relational/clc_isnan.h>
 #include <clc/shared/clc_clamp.h>
diff --git a/libclc/clc/lib/generic/math/clc_pow.cl b/libclc/clc/lib/generic/math/clc_pow.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_select.h>
diff --git a/libclc/clc/lib/generic/math/clc_pown.cl b/libclc/clc/lib/generic/math/clc_pown.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_select.h>
diff --git a/libclc/clc/lib/generic/math/clc_powr.cl b/libclc/clc/lib/generic/math/clc_powr.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
 #include <clc/math/clc_mad.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_select.h>
diff --git a/libclc/clc/lib/generic/math/clc_remquo.cl b/libclc/clc/lib/generic/math/clc_remquo.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_floor.h>
 #include <clc/math/clc_fma.h>
 #include <clc/math/clc_ldexp.h>
-#include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
 #include <clc/shared/clc_max.h>
diff --git a/libclc/clc/lib/generic/math/clc_subnormal_config.cl b/libclc/clc/lib/generic/math/clc_subnormal_config.cl
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+#include <clc/math/clc_subnormal_config.h>
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEF bool __clc_fp16_subnormals_supported() {
+#ifdef CLC_SPIRV
+  // SPIR-V doesn't support llvm.canonicalize for now.
+  return false;
+#else
+  return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-24h),
+                              __FPCLASS_POSZERO);
+#endif
+}
+#endif // cl_khr_fp16
+
+_CLC_DEF bool __clc_fp32_subnormals_supported() {
+#ifdef CLC_SPIRV
+  // SPIR-V doesn't support llvm.canonicalize for now.
+  return false;
+#else
+  return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-149f),
+                              __FPCLASS_POSZERO);
+#endif
+}
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DEF bool __clc_fp64_subnormals_supported() {
+#ifdef CLC_SPIRV
+  // SPIR-V doesn't support llvm.canonicalize for now.
+  return false;
+#else
+  return !__builtin_isfpclass(__builtin_canonicalizef(0x1p-1074),
+                              __FPCLASS_POSZERO);
+#endif
+}
+#endif // cl_khr_fp64
diff --git a/libclc/opencl/lib/clspv/SOURCES b/libclc/opencl/lib/clspv/SOURCES
@@ -1,6 +1,5 @@
 math/fma.cl
 shared/vstore_half.cl
-subnormal_config.cl
 ../generic/geometric/distance.cl
 ../generic/geometric/length.cl
 ../generic/math/acos.cl
diff --git a/libclc/opencl/lib/clspv/subnormal_config.cl b/libclc/opencl/lib/clspv/subnormal_config.cl
diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES
@@ -1,5 +1,3 @@
-subnormal_config.cl
-subnormal_helper_func.ll
 async/async_work_group_copy.cl
 async/async_work_group_strided_copy.cl
 async/prefetch.cl
diff --git a/libclc/opencl/lib/generic/subnormal_config.cl b/libclc/opencl/lib/generic/subnormal_config.cl
diff --git a/libclc/opencl/lib/generic/subnormal_disable.ll b/libclc/opencl/lib/generic/subnormal_disable.ll
diff --git a/libclc/opencl/lib/generic/subnormal_helper_func.ll b/libclc/opencl/lib/generic/subnormal_helper_func.ll
diff --git a/libclc/opencl/lib/generic/subnormal_use_default.ll b/libclc/opencl/lib/generic/subnormal_use_default.ll
diff --git a/libclc/opencl/lib/spirv/SOURCES b/libclc/opencl/lib/spirv/SOURCES
@@ -1,4 +1,3 @@
-subnormal_config.cl
 ../generic/async/async_work_group_strided_copy.cl
 ../generic/async/wait_group_events.cl
 ../generic/common/degrees.cl
diff --git a/libclc/opencl/lib/spirv/subnormal_config.cl b/libclc/opencl/lib/spirv/subnormal_config.cl