[clang][NVPTX] Add intrinsics and builtins for CVT RS rounding mode #160494

Wolfram70 · 2025-09-24T10:45:32Z

This change adds LLVM intrinsics and clang builtins for the cvt
RS rounding mode instruction variants.

Tests are added in convert-sm103a.ll and verified through ptxas-13.0.

llvmbot · 2025-09-24T10:46:01Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-backend-nvptx

@llvm/pr-subscribers-clang

Author: Srinivasa Ravi (Wolfram70)

Changes

This change adds LLVM intrinsics and clang builtins for the cvt
RS rounding mode instruction variants.

Tests are added in convert-sm103a.ll and verified through ptxas-13.0.

Patch is 44.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160494.diff

10 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsNVPTX.td (+21)
(modified) clang/test/CodeGen/builtins-nvptx.c (+83)
(modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+37)
(modified) llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp (+3)
(modified) llvm/lib/Target/NVPTX/NVPTX.h (+1)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+82-2)
(modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (+5)
(modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+44)
(modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+75)
(added) llvm/test/CodeGen/NVPTX/convert-sm103a.ll (+297)

diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 2d6fa1771014d..819262d87a917 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -579,11 +579,19 @@ def __nvvm_ff2bf16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)
 def __nvvm_ff2bf16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2bf16x2_rs : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2bf16x2_rs_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2bf16x2_rs_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2bf16x2_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
 
 def __nvvm_ff2f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2f16x2_rs : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2f16x2_rs_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2f16x2_rs_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_ff2f16x2_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
 
 def __nvvm_f2bf16_rn : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rn_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
@@ -616,6 +624,11 @@ def __nvvm_e4m3x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(sh
 def __nvvm_e5m2x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM_89, PTX81>;
 def __nvvm_e5m2x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM_89, PTX81>;
 
+def __nvvm_f32x4_to_e4m3x4_rs_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e4m3x4_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e5m2x4_rs_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e5m2x4_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+
 def __nvvm_ff_to_e2m3x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_e2m3x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_e3m2x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
@@ -626,12 +639,20 @@ def __nvvm_e2m3x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(sh
 def __nvvm_e3m2x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_e3m2x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
+def __nvvm_f32x4_to_e2m3x4_rs_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e2m3x4_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e3m2x4_rs_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e3m2x4_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<4, char>(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+
 def __nvvm_ff_to_e2m1x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_e2m1x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
 def __nvvm_e2m1x2_to_f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_e2m1x2_to_f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(short)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 
+def __nvvm_f32x4_to_e2m1x4_rs_satfinite : NVPTXBuiltinSMAndPTX<"short(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+def __nvvm_f32x4_to_e2m1x4_rs_relu_satfinite : NVPTXBuiltinSMAndPTX<"short(_Vector<4, float>, uint32_t)", SM<"100a", [SM_103a]>, PTX87>;
+
 def __nvvm_ff_to_ue8m0x2_rz : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_ue8m0x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
 def __nvvm_ff_to_ue8m0x2_rp : NVPTXBuiltinSMAndPTX<"short(float, float)", SM<"100a", [SM_101a, SM_120a]>, PTX86>;
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index f994adb14e457..0cf116ea5c5b4 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -43,6 +43,12 @@
 // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_120a -target-feature +ptx86 -DPTX=86 \
 // RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
 // RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX86_SM120a %s
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_103a -target-feature +ptx87 -DPTX=87 \
+// RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX87_SM103a %s
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_100a -target-feature +ptx87 -DPTX=87 \
+// RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX87_SM100a %s
 // ###  The last run to check with the highest SM and PTX version available
 // ###  to make sure target builtins are still accepted.
 // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_120a -target-feature +ptx87 -DPTX=87 \
@@ -1203,6 +1209,83 @@ __device__ void nvvm_cvt_sm100a_sm101a_sm120a() {
   // CHECK: ret void
 }
 
+__device__ void nvvm_cvt_sm100a_sm103a() {
+#if (PTX >= 87) && (__CUDA_ARCH_FEAT_SM100_ALL || __CUDA_ARCH_FEAT_SM103_ALL)
+  
+// CHECK_PTX87_SM100a: call <2 x half> @llvm.nvvm.ff2f16x2.rs(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x half> @llvm.nvvm.ff2f16x2.rs(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2f16x2_rs(1.0f, 1.0f, 0);
+  
+// CHECK_PTX87_SM100a: call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2f16x2_rs_relu(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <2 x half> @llvm.nvvm.ff2f16x2.rs.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x half> @llvm.nvvm.ff2f16x2.rs.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2f16x2_rs_satfinite(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x half> @llvm.nvvm.ff2f16x2.rs.relu.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2f16x2_rs_relu_satfinite(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2bf16x2_rs(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2bf16x2_rs_relu(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2bf16x2_rs_satfinite(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+// CHECK_PTX87_SM103a: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rs.relu.satfinite(float 1.000000e+00, float 1.000000e+00, i32 0)
+  __nvvm_ff2bf16x2_rs_relu_satfinite(1.0f, 1.0f, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e4m3x4_rs_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e4m3x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e4m3x4_rs_relu_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e5m2x4_rs_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e5m2x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e5m2x4_rs_relu_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e2m3x4_rs_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);  
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e2m3x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e2m3x4_rs_relu_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e3m2x4_rs_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call <4 x i8> @llvm.nvvm.f32x4.to.e3m2x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e3m2x4_rs_relu_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e2m1x4_rs_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+
+// CHECK_PTX87_SM100a: call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+// CHECK_PTX87_SM103a: call i16 @llvm.nvvm.f32x4.to.e2m1x4.rs.relu.satfinite(<4 x float> splat (float 1.000000e+00), i32 0)
+  __nvvm_f32x4_to_e2m1x4_rs_relu_satfinite({1.0f, 1.0f, 1.0f, 1.0f}, 0);
+#endif
+}
+
 #define NAN32 0x7FBFFFFF
 #define NAN16 (__bf16)0x7FBF
 #define BF16 (__bf16)0.1f
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 7b40841e45d0d..78aedb99487cd 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1421,6 +1421,18 @@ let TargetPrefix = "nvvm" in {
     }
   }
 
+  // RS rounding mode (Stochastic Rounding) conversions for f16x2, bf16x2 types
+  // The last i32 operand provides the random bits for the conversion
+  foreach relu = ["", "_relu"] in {
+    foreach satfinite = ["", "_satfinite"] in {
+      def int_nvvm_ff2f16x2_rs # relu # satfinite : NVVMBuiltin,
+          PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>;
+
+      def int_nvvm_ff2bf16x2_rs # relu # satfinite : NVVMBuiltin,
+          PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty, llvm_i32_ty]>;
+    }
+  }
+
   foreach satfinite = ["", "_satfinite"] in {
     def int_nvvm_f2tf32_rna # satfinite : NVVMBuiltin,
         PureIntrinsic<[llvm_i32_ty], [llvm_float_ty]>;
@@ -1443,6 +1455,15 @@ let TargetPrefix = "nvvm" in {
           PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
     }
   }
+  
+  // RS rounding mode (Stochastic Rounding) conversions for f8x4 types
+  // The last i32 operand provides the random bits for the conversion
+  foreach type = ["e4m3x4", "e5m2x4"] in {
+    foreach relu = ["", "_relu"] in {
+      def int_nvvm_f32x4_to_ # type # _rs # relu # _satfinite : NVVMBuiltin,
+          PureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
+    }
+  }
 
   // FP4 conversions.
   foreach relu = ["", "_relu"] in {
@@ -1452,6 +1473,13 @@ let TargetPrefix = "nvvm" in {
     def int_nvvm_e2m1x2_to_f16x2_rn # relu : NVVMBuiltin,
         PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
   }
+  
+  // RS rounding mode (Stochastic Rounding) conversions for f4x4 type
+  // The last i32 operand provides the random bits for the conversion
+  foreach relu = ["", "_relu"] in {
+    def int_nvvm_f32x4_to_e2m1x4_rs # relu # _satfinite : NVVMBuiltin,
+        PureIntrinsic<[llvm_i16_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
+  }
 
   // FP6 conversions.
   foreach type = ["e2m3x2", "e3m2x2"] in {
@@ -1463,6 +1491,15 @@ let TargetPrefix = "nvvm" in {
           PureIntrinsic<[llvm_v2f16_ty], [llvm_i16_ty]>;
     }
   }
+  
+  // RS rounding mode (Stochastic Rounding) conversions for f6x4 types
+  // The last i32 operand provides the random bits for the conversion
+  foreach type = ["e2m3x4", "e3m2x4"] in {
+    foreach relu = ["", "_relu"] in {
+      def int_nvvm_f32x4_to_ # type # _rs # relu # _satfinite : NVVMBuiltin,
+          PureIntrinsic<[llvm_v4i8_ty], [llvm_v4f32_ty, llvm_i32_ty]>;
+    }
+  }
 
   // UE8M0x2 conversions.
   foreach rmode = ["_rz", "_rp"] in {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index f9bdc09935330..77913f27838e2 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -149,6 +149,9 @@ void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
     case NVPTX::PTXCvtMode::RNA:
       O << ".rna";
       return;
+    case NVPTX::PTXCvtMode::RS:
+      O << ".rs";
+      return;
     }
   }
   llvm_unreachable("Invalid conversion modifier");
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 77a0e03d4075a..1e0f747f8f7fc 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -207,6 +207,7 @@ enum CvtMode {
   RM,
   RP,
   RNA,
+  RS,
 
   BASE_MASK = 0x0F,
   FTZ_FLAG = 0x10,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ca8a3f69f991d..05ada362ab946 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1077,9 +1077,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   // Enable custom lowering for the following:
   //   * MVT::i128 - clusterlaunchcontrol
   //   * MVT::i32 - prmt
+  //   * MVT::v4f32 - cvt_rs fp{4/6/8}x4 intrinsics
   //   * MVT::Other - internal.addrspace.wrap
-  setOperationAction(ISD::INTRINSIC_WO_CHAIN, {MVT::i32, MVT::i128, MVT::Other},
-                     Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN,
+                     {MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
 }
 
 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -1134,6 +1135,11 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
     MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
     MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
+    MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
+    MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
+    MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
+    MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
+    MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
   }
   return nullptr;
 
@@ -2693,6 +2699,69 @@ static SDValue LowerClusterLaunchControlQueryCancel(SDValue Op,
                      {TryCancelResponse0, TryCancelResponse1});
 }
 
+bool isCvtRSReluIntrinsic(Intrinsic::ID ID) {
+  switch (ID) {
+  case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
+  case Intrinsic::nvvm_f32x4_to_e5m2x4_rs_relu_satfinite:
+  case Intrinsic::nvvm_f32x4_to_e2m3x4_rs_relu_satfinite:
+  case Intrinsic::nvvm_f32x4_to_e3m2x4_rs_relu_satfinite:
+  case Intrinsic::nvvm_f32x4_to_e2m1x4_rs_relu_satfinite:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
+  SDNode *N = Op.getNode();
+  SDLoc DL(N);
+  SDValue F32Vec = N->getOperand(1);
+  SDValue RBits = N->getOperand(2);
+
+  unsigned IntrinsicID = N->getConstantOperandVal(0);
+
+  uint32_t CvtModeFlag = NVPTX::PTXCvtMode::CvtMode::RS;
+  if (isCvtRSReluIntrinsic(IntrinsicID))
+    CvtModeFlag |= NVPTX::PTXCvtMode::CvtMode::RELU_FLAG;
+
+  SDValue Float1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+                               DAG.getIntPtrConstant(0, DL));
+  SDValue Float2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+                               DAG.getIntPtrConstant(1, DL));
+  SDValue Float3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+                               DAG.getIntPtrConstant(2, DL));
+  SDValue Float4 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, F32Vec,
+                               DAG.getIntPtrConstant(3, DL));
+
+  auto OpSignature =
+      [&]() -> std::pair<NVPTXISD::NodeType, MVT::SimpleValueType> {
+    switch (IntrinsicID) {
+    case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
+    case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_satfinite:
+      return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8};
+    case Intrinsic::nvvm_f32x4_to_e5m2x4_rs_relu_satfinite:
+    case Intrinsic::nvvm_f32x4_to_e5m2x4_rs_satfinite:
+      return {NVPTXISD::CVT_E5M2X4_F32X4_RS_SF, MVT::v4i8};
+    case Intrinsic::nvvm_f32x4_to_e2m3x4_rs_relu_satfinite:
+    case Intrinsic::nvvm_f32x4_to_e2m3x4_rs_satfinite:
+      return {NVPTXISD::CVT_E2M3X4_F32X4_RS_SF, MVT::v4i8};
+    case Intrinsic::nvvm_f32x4_to_e3m2x4_rs_relu_satfinite:
+    case Intrinsic::nvvm_f32x4_to_e3m2x4_rs_satfinite:
+      return {NVPTXISD::CVT_E3M2X4_F32X4_RS_SF, MVT::v4i8};
+    case Intrinsic::nvvm_f32x4_to_e2m1x4_rs_relu_satfinite:
+    case Intrinsic::nvvm_f32x4_to_e2m1x4_rs_satfin...
[truncated]

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

This change adds LLVM intrinsics and clang builtins for the `cvt` RS rounding mode instruction variants. Tests are added in `convert-sm103a.ll` and verified through ptxas-13.0.

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

durga4github · 2025-09-25T10:48:42Z

LGTM, with one optional nit.

@Artem-B , Please help us with a review on the Clang side of things.

Artem-B

LGTM, modulo few style and test nits.

clang/include/clang/Basic/BuiltinsNVPTX.td

clang/test/CodeGen/builtins-nvptx.c

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Artem-B

LGTM

llvm-ci · 2025-10-06T04:00:33Z

LLVM Buildbot has detected a new failure on builder clang-with-thin-lto-ubuntu running on as-worker-92 while building clang,llvm at step 7 "test-stage1-compiler".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/127/builds/4906

Here is the relevant piece of the build log for the reference

Step 7 (test-stage1-compiler) failure: build (failure)
...
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/utils/lit/lit/llvm/config.py:530: note: using ld64.lld: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/ld64.lld
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/utils/lit/lit/llvm/config.py:530: note: using wasm-ld: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/wasm-ld
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/utils/lit/tests/lit.cfg:111: warning: Setting a timeout per test not supported. Requires the Python psutil module but it could not be found. Try installing it via pip or via your operating system's package manager.
 Some tests will be skipped and the --timeout command line argument will not work.
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/utils/lit/lit/llvm/config.py:530: note: using ld.lld: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/ld.lld
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/utils/lit/lit/llvm/config.py:530: note: using lld-link: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/lld-link
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/utils/lit/lit/llvm/config.py:530: note: using ld64.lld: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/ld64.lld
llvm-lit: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/utils/lit/lit/llvm/config.py:530: note: using wasm-ld: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/wasm-ld
-- Testing: 87753 tests, 72 workers --
Testing: 
FAIL: LLVM :: CodeGen/RISCV/rvv/expandload.ll (1 of 87753)
******************** TEST 'LLVM :: CodeGen/RISCV/rvv/expandload.ll' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 2
/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d,+m,+zbb /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/test/CodeGen/RISCV/rvv/expandload.ll -o -    | /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/FileCheck /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/test/CodeGen/RISCV/rvv/expandload.ll --check-prefixes=CHECK,CHECK-RV32
# executed command: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d,+m,+zbb /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/test/CodeGen/RISCV/rvv/expandload.ll -o -
# .---command stderr------------
# | LLVM ERROR: Cannot select: intrinsic %llvm.riscv.viota
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
# | Stack dump:
# | 0.	Program arguments: /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc -verify-machineinstrs -mtriple=riscv32 -mattr=+v,+d,+m,+zbb /home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/test/CodeGen/RISCV/rvv/expandload.ll -o -
# | 1.	Running pass 'Function Pass Manager' on module '/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/llvm-project/llvm/test/CodeGen/RISCV/rvv/expandload.ll'.
# | 2.	Running pass 'RISC-V DAG->DAG Pattern Instruction Selection' on function '@test_expandload_v1i8'
# |  #0 0x00005592c51f1330 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x3aa9330)
# |  #1 0x00005592c51ee52d SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
# |  #2 0x00007fdecb29e520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
# |  #3 0x00007fdecb2f29fc pthread_kill (/lib/x86_64-linux-gnu/libc.so.6+0x969fc)
# |  #4 0x00007fdecb29e476 gsignal (/lib/x86_64-linux-gnu/libc.so.6+0x42476)
# |  #5 0x00007fdecb2847f3 abort (/lib/x86_64-linux-gnu/libc.so.6+0x287f3)
# |  #6 0x00005592c1daa065 llvm::DisplayGraph(llvm::StringRef, bool, llvm::GraphProgram::Name) (.cold) GraphWriter.cpp:0:0
# |  #7 0x00005592c4f8a4b5 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x38424b5)
# |  #8 0x00005592c4f8e61d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x384661d)
# |  #9 0x00005592c31a71b7 llvm::RISCVDAGToDAGISel::Select(llvm::SDNode*) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x1a5f1b7)
# | #10 0x00005592c4f870e2 llvm::SelectionDAGISel::DoInstructionSelection() (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x383f0e2)
# | #11 0x00005592c4f93b16 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x384bb16)
# | #12 0x00005592c4f96595 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x384e595)
# | #13 0x00005592c4f981f5 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x38501f5)
# | #14 0x00005592c4f86c5d llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x383ec5d)
# | #15 0x00005592c420488d llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x2abc88d)
# | #16 0x00005592c47a6752 llvm::FPPassManager::runOnFunction(llvm::Function&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x305e752)
# | #17 0x00005592c47a6914 llvm::FPPassManager::runOnModule(llvm::Module&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x305e914)
# | #18 0x00005592c47a7342 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x305f342)
# | #19 0x00005592c1ef56b9 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
# | #20 0x00005592c1dbf1c6 main (/home/buildbot/as-worker-92/clang-with-thin-lto-ubuntu/build/stage1/bin/llc+0x6771c6)
# | #21 0x00007fdecb285d90 (/lib/x86_64-linux-gnu/libc.so.6+0x29d90)
# | #22 0x00007fdecb285e40 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x29e40)

llvm-ci · 2025-10-06T04:12:38Z

LLVM Buildbot has detected a new failure on builder arc-builder running on arc-worker while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/22957

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/X86/sse2-intrinsics-fast-isel.ll' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
/buildbot/worker/arc-folder/build/bin/llc < /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE
# executed command: /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
# .---command stderr------------
# | LLVM ERROR: Cannot select: intrinsic %llvm.x86.sse2.clflush
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
# | Stack dump:
# | 0.	Program arguments: /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
# | 1.	Running pass 'Function Pass Manager' on module '<stdin>'.
# | 2.	Running pass 'X86 DAG->DAG Instruction Selection' on function '@test_mm_clflush'
# |  #0 0x0000000002380428 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/buildbot/worker/arc-folder/build/bin/llc+0x2380428)
# |  #1 0x000000000237d335 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
# |  #2 0x00007fc5a2020630 __restore_rt sigaction.c:0:0
# |  #3 0x00007fc5a0d703d7 raise (/usr/lib64/libc.so.6+0x363d7)
# |  #4 0x00007fc5a0d71ac8 abort (/usr/lib64/libc.so.6+0x37ac8)
# |  #5 0x000000000072474d llvm::json::operator==(llvm::json::Value const&, llvm::json::Value const&) (.cold) JSON.cpp:0:0
# |  #6 0x0000000002108ca9 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/buildbot/worker/arc-folder/build/bin/llc+0x2108ca9)
# |  #7 0x000000000210d83a llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/buildbot/worker/arc-folder/build/bin/llc+0x210d83a)
# |  #8 0x0000000000968b37 (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) X86ISelDAGToDAG.cpp:0:0
# |  #9 0x00000000021044ef llvm::SelectionDAGISel::DoInstructionSelection() (/buildbot/worker/arc-folder/build/bin/llc+0x21044ef)
# | #10 0x00000000021143f8 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/buildbot/worker/arc-folder/build/bin/llc+0x21143f8)
# | #11 0x000000000211852e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/buildbot/worker/arc-folder/build/bin/llc+0x211852e)
# | #12 0x0000000002119185 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x2119185)
# | #13 0x0000000002103cff llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x2103cff)
# | #14 0x000000000121a4a7 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.0) MachineFunctionPass.cpp:0:0
# | #15 0x000000000189589b llvm::FPPassManager::runOnFunction(llvm::Function&) (/buildbot/worker/arc-folder/build/bin/llc+0x189589b)
# | #16 0x0000000001895c41 llvm::FPPassManager::runOnModule(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x1895c41)
# | #17 0x0000000001896855 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x1896855)
# | #18 0x0000000000805f22 compileModule(char**, llvm::LLVMContext&) llc.cpp:0:0
# | #19 0x000000000072cfe6 main (/buildbot/worker/arc-folder/build/bin/llc+0x72cfe6)
# | #20 0x00007fc5a0d5c555 __libc_start_main (/usr/lib64/libc.so.6+0x22555)
# | #21 0x00000000007fc146 _start (/buildbot/worker/arc-folder/build/bin/llc+0x7fc146)
# `-----------------------------
# error: command failed with exit status: -6
# executed command: /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE
# .---command stderr------------
# | /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll:399:14: error: SSE-LABEL: expected string not found in input
# | ; SSE-LABEL: test_mm_bsrli_si128:
# |              ^
# | <stdin>:170:21: note: scanning from here
# | test_mm_bslli_si128: # @test_mm_bslli_si128
# |                     ^
# | <stdin>:178:9: note: possible intended match here
# |  .globl test_mm_bsrli_si128 # 
# |         ^
...

llvm-ci · 2025-10-06T04:16:37Z

LLVM Buildbot has detected a new failure on builder clang-x64-windows-msvc running on windows-gcebot2 while building clang,llvm at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/63/builds/11049

Here is the relevant piece of the build log for the reference

Step 4 (annotate) failure: 'python ../llvm-zorg/zorg/buildbot/builders/annotated/clang-windows.py ...' (failure)
...
[327/329] Running the Clang regression tests
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:57: note: using lit tools: C:\Program Files\Git\usr\bin
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using clang: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\subst.py:126: note: Did not find cir-opt in C:\b\slave\clang-x64-windows-msvc\build\stage1\bin;C:\b\slave\clang-x64-windows-msvc\build\stage1\bin
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using ld.lld: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\ld.lld.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using lld-link: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\lld-link.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using ld64.lld: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\ld64.lld.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using wasm-ld: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\wasm-ld.exe
-- Testing: 23095 tests, 32 workers --
Testing:  0.. 10.. 20.. 30.. 40
FAIL: Clang :: CodeGen/SystemZ/builtins-systemz-zvector.c (828 of 23095)
******************** TEST 'Clang :: CodeGen/SystemZ/builtins-systemz-zvector.c' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 2
c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe -cc1 -internal-isystem C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu  -O2 -fzvector -flax-vector-conversions=none  -Wall -Wno-unused -Werror -emit-llvm C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c -o - | c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe' -cc1 -internal-isystem 'C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include' -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu -O2 -fzvector -flax-vector-conversions=none -Wall -Wno-unused -Werror -emit-llvm 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c' -o -
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe' 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c'
# RUN: at line 5
c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe -cc1 -internal-isystem C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu  -O2 -fzvector -flax-vector-conversions=none  -Wall -Wno-unused -Werror -S C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c -o - | c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c --check-prefix=CHECK-ASM
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe' -cc1 -internal-isystem 'C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include' -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu -O2 -fzvector -flax-vector-conversions=none -Wall -Wno-unused -Werror -S 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c' -o -
# .---command stderr------------
# | fatal error: error in backend: Cannot select: intrinsic %llvm.s390.vpklsf
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
# | Stack dump:
# | 0.	Program arguments: c:\\b\\slave\\clang-x64-windows-msvc\\build\\stage1\\bin\\clang.exe -cc1 -internal-isystem C:\\b\\slave\\clang-x64-windows-msvc\\build\\stage1\\lib\\clang\\22\\include -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu -O2 -fzvector -flax-vector-conversions=none -Wall -Wno-unused -Werror -S C:\\b\\slave\\clang-x64-windows-msvc\\llvm-project\\clang\\test\\CodeGen\\SystemZ\\builtins-systemz-zvector.c -o -
# | 1.	<eof> parser at end of file
# | 2.	Code generation
# | 3.	Running pass 'Function Pass Manager' on module 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c'.
# | 4.	Running pass 'SystemZ DAG->DAG Pattern Instruction Selection' on function '@test_core'
# `-----------------------------
# error: command failed with exit status: 70
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe' 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c' --check-prefix=CHECK-ASM
# .---command stderr------------
# | FileCheck error: '<stdin>' is empty.
# | FileCheck command line:  c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c --check-prefix=CHECK-ASM
# `-----------------------------
# error: command failed with exit status: 2

--

********************
Testing:  0.. 10.. 20.. 30.. 40.. 50..
FAIL: Clang :: CodeGen/SystemZ/builtins-systemz-zvector2.c (1809 of 23095)
******************** TEST 'Clang :: CodeGen/SystemZ/builtins-systemz-zvector2.c' FAILED ********************
Exit Code: 2

Step 8 (stage 1 check) failure: stage 1 check (failure)
...
[327/329] Running the Clang regression tests
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:57: note: using lit tools: C:\Program Files\Git\usr\bin
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using clang: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\subst.py:126: note: Did not find cir-opt in C:\b\slave\clang-x64-windows-msvc\build\stage1\bin;C:\b\slave\clang-x64-windows-msvc\build\stage1\bin
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using ld.lld: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\ld.lld.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using lld-link: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\lld-link.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using ld64.lld: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\ld64.lld.exe
llvm-lit.py: C:\b\slave\clang-x64-windows-msvc\llvm-project\llvm\utils\lit\lit\llvm\config.py:530: note: using wasm-ld: c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\wasm-ld.exe
-- Testing: 23095 tests, 32 workers --
Testing:  0.. 10.. 20.. 30.. 40
FAIL: Clang :: CodeGen/SystemZ/builtins-systemz-zvector.c (828 of 23095)
******************** TEST 'Clang :: CodeGen/SystemZ/builtins-systemz-zvector.c' FAILED ********************
Exit Code: 2

Command Output (stdout):
--
# RUN: at line 2
c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe -cc1 -internal-isystem C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu  -O2 -fzvector -flax-vector-conversions=none  -Wall -Wno-unused -Werror -emit-llvm C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c -o - | c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe' -cc1 -internal-isystem 'C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include' -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu -O2 -fzvector -flax-vector-conversions=none -Wall -Wno-unused -Werror -emit-llvm 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c' -o -
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe' 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c'
# RUN: at line 5
c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe -cc1 -internal-isystem C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu  -O2 -fzvector -flax-vector-conversions=none  -Wall -Wno-unused -Werror -S C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c -o - | c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c --check-prefix=CHECK-ASM
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\clang.exe' -cc1 -internal-isystem 'C:\b\slave\clang-x64-windows-msvc\build\stage1\lib\clang\22\include' -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu -O2 -fzvector -flax-vector-conversions=none -Wall -Wno-unused -Werror -S 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c' -o -
# .---command stderr------------
# | fatal error: error in backend: Cannot select: intrinsic %llvm.s390.vpklsf
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
# | Stack dump:
# | 0.	Program arguments: c:\\b\\slave\\clang-x64-windows-msvc\\build\\stage1\\bin\\clang.exe -cc1 -internal-isystem C:\\b\\slave\\clang-x64-windows-msvc\\build\\stage1\\lib\\clang\\22\\include -nostdsysteminc -target-cpu z13 -triple s390x-linux-gnu -O2 -fzvector -flax-vector-conversions=none -Wall -Wno-unused -Werror -S C:\\b\\slave\\clang-x64-windows-msvc\\llvm-project\\clang\\test\\CodeGen\\SystemZ\\builtins-systemz-zvector.c -o -
# | 1.	<eof> parser at end of file
# | 2.	Code generation
# | 3.	Running pass 'Function Pass Manager' on module 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c'.
# | 4.	Running pass 'SystemZ DAG->DAG Pattern Instruction Selection' on function '@test_core'
# `-----------------------------
# error: command failed with exit status: 70
# executed command: 'c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe' 'C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c' --check-prefix=CHECK-ASM
# .---command stderr------------
# | FileCheck error: '<stdin>' is empty.
# | FileCheck command line:  c:\b\slave\clang-x64-windows-msvc\build\stage1\bin\filecheck.exe C:\b\slave\clang-x64-windows-msvc\llvm-project\clang\test\CodeGen\SystemZ\builtins-systemz-zvector.c --check-prefix=CHECK-ASM
# `-----------------------------
# error: command failed with exit status: 2

--

********************
Testing:  0.. 10.. 20.. 30.. 40.. 50..
FAIL: Clang :: CodeGen/SystemZ/builtins-systemz-zvector2.c (1809 of 23095)
******************** TEST 'Clang :: CodeGen/SystemZ/builtins-systemz-zvector2.c' FAILED ********************
Exit Code: 2

llvm-ci · 2025-10-06T05:09:16Z

LLVM Buildbot has detected a new failure on builder sanitizer-x86_64-linux-android running on sanitizer-buildbot-android while building clang,llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/186/builds/12952

Here is the relevant piece of the build log for the reference

Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
[       OK ] AddressSanitizer.AtoiAndFriendsOOBTest (2262 ms)
[ RUN      ] AddressSanitizer.HasFeatureAddressSanitizerTest
[       OK ] AddressSanitizer.HasFeatureAddressSanitizerTest (0 ms)
[ RUN      ] AddressSanitizer.CallocReturnsZeroMem
[       OK ] AddressSanitizer.CallocReturnsZeroMem (19 ms)
[ DISABLED ] AddressSanitizer.DISABLED_TSDTest
[ RUN      ] AddressSanitizer.IgnoreTest
[       OK ] AddressSanitizer.IgnoreTest (0 ms)
[ RUN      ] AddressSanitizer.SignalTest
[       OK ] AddressSanitizer.SignalTest (158 ms)
[ RUN      ] AddressSanitizer.ReallocTest
[       OK ] AddressSanitizer.ReallocTest (30 ms)
[ RUN      ] AddressSanitizer.WrongFreeTest
[       OK ] AddressSanitizer.WrongFreeTest (126 ms)
[ RUN      ] AddressSanitizer.LongJmpTest
[       OK ] AddressSanitizer.LongJmpTest (0 ms)
[ RUN      ] AddressSanitizer.ThreadStackReuseTest
[       OK ] AddressSanitizer.ThreadStackReuseTest (8 ms)
[ DISABLED ] AddressSanitizer.DISABLED_MemIntrinsicUnalignedAccessTest
[ DISABLED ] AddressSanitizer.DISABLED_LargeFunctionSymbolizeTest
[ DISABLED ] AddressSanitizer.DISABLED_MallocFreeUnwindAndSymbolizeTest
[ RUN      ] AddressSanitizer.UseThenFreeThenUseTest
[       OK ] AddressSanitizer.UseThenFreeThenUseTest (134 ms)
[ RUN      ] AddressSanitizer.FileNameInGlobalReportTest
[       OK ] AddressSanitizer.FileNameInGlobalReportTest (109 ms)
[ DISABLED ] AddressSanitizer.DISABLED_StressStackReuseAndExceptionsTest
[ RUN      ] AddressSanitizer.MlockTest
[       OK ] AddressSanitizer.MlockTest (0 ms)
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadedTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowIn
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowLeft
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowRight
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFHigh
[ DISABLED ] AddressSanitizer.DISABLED_DemoOOM
[ DISABLED ] AddressSanitizer.DISABLED_DemoDoubleFreeTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoNullDerefTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoFunctionStaticTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoTooMuchMemoryTest
[ RUN      ] AddressSanitizer.LongDoubleNegativeTest
[       OK ] AddressSanitizer.LongDoubleNegativeTest (0 ms)
[----------] 19 tests from AddressSanitizer (27615 ms total)

[----------] Global test environment tear-down
[==========] 22 tests from 2 test suites ran. (27619 ms total)
[  PASSED  ] 22 tests.

  YOU HAVE 1 DISABLED TEST

Step 34 (run instrumented asan tests [aarch64/bluejay-userdebug/TQ3A.230805.001]) failure: run instrumented asan tests [aarch64/bluejay-userdebug/TQ3A.230805.001] (failure)
...
[ RUN      ] AddressSanitizer.HasFeatureAddressSanitizerTest
[       OK ] AddressSanitizer.HasFeatureAddressSanitizerTest (0 ms)
[ RUN      ] AddressSanitizer.CallocReturnsZeroMem
[       OK ] AddressSanitizer.CallocReturnsZeroMem (19 ms)
[ DISABLED ] AddressSanitizer.DISABLED_TSDTest
[ RUN      ] AddressSanitizer.IgnoreTest
[       OK ] AddressSanitizer.IgnoreTest (0 ms)
[ RUN      ] AddressSanitizer.SignalTest
[       OK ] AddressSanitizer.SignalTest (158 ms)
[ RUN      ] AddressSanitizer.ReallocTest
[       OK ] AddressSanitizer.ReallocTest (30 ms)
[ RUN      ] AddressSanitizer.WrongFreeTest
[       OK ] AddressSanitizer.WrongFreeTest (126 ms)
[ RUN      ] AddressSanitizer.LongJmpTest
[       OK ] AddressSanitizer.LongJmpTest (0 ms)
[ RUN      ] AddressSanitizer.ThreadStackReuseTest
[       OK ] AddressSanitizer.ThreadStackReuseTest (8 ms)
[ DISABLED ] AddressSanitizer.DISABLED_MemIntrinsicUnalignedAccessTest
[ DISABLED ] AddressSanitizer.DISABLED_LargeFunctionSymbolizeTest
[ DISABLED ] AddressSanitizer.DISABLED_MallocFreeUnwindAndSymbolizeTest
[ RUN      ] AddressSanitizer.UseThenFreeThenUseTest
[       OK ] AddressSanitizer.UseThenFreeThenUseTest (134 ms)
[ RUN      ] AddressSanitizer.FileNameInGlobalReportTest
[       OK ] AddressSanitizer.FileNameInGlobalReportTest (109 ms)
[ DISABLED ] AddressSanitizer.DISABLED_StressStackReuseAndExceptionsTest
[ RUN      ] AddressSanitizer.MlockTest
[       OK ] AddressSanitizer.MlockTest (0 ms)
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadedTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoThreadStackTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowIn
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowLeft
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFLowRight
[ DISABLED ] AddressSanitizer.DISABLED_DemoUAFHigh
[ DISABLED ] AddressSanitizer.DISABLED_DemoOOM
[ DISABLED ] AddressSanitizer.DISABLED_DemoDoubleFreeTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoNullDerefTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoFunctionStaticTest
[ DISABLED ] AddressSanitizer.DISABLED_DemoTooMuchMemoryTest
[ RUN      ] AddressSanitizer.LongDoubleNegativeTest
[       OK ] AddressSanitizer.LongDoubleNegativeTest (0 ms)
[----------] 19 tests from AddressSanitizer (27615 ms total)

[----------] Global test environment tear-down
[==========] 22 tests from 2 test suites ran. (27619 ms total)
[  PASSED  ] 22 tests.

  YOU HAVE 1 DISABLED TEST
program finished with exit code 0
elapsedTime=4902.163399

…lvm#160494) This change adds LLVM intrinsics and clang builtins for the `cvt` RS rounding mode instruction variants. Tests are added in `convert-sm103a.ll` and verified through ptxas-13.0.

Wolfram70 requested review from Artem-B and durga4github September 24, 2025 10:45

Wolfram70 self-assigned this Sep 24, 2025

llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" backend:NVPTX llvm:ir labels Sep 24, 2025

Wolfram70 force-pushed the dev/Wolfram70/cvt-rs branch from c9950c7 to 23f472a Compare September 24, 2025 11:09