[DirectX] only allow intrinsics defined in DXIL.td (#128613)

farzonl · web-flow · commit 0be3f134c3b0 · 2025-02-25T16:09:41.000-05:00
Fixes #128071 The current behavior lets intrinsics that don't map to a DXILOP slip through. Nothing catches this until we hit the DXIL validator. This change fails earlier so we don't encode invalid llvm intrinsics that can slip through because of clang builtins like `__builtin_reduce_and` example: https://hlsl.godbolt.org/z/13rPj18vn
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -770,8 +770,20 @@ class OpLowerer {
         continue;
       Intrinsic::ID ID = F.getIntrinsicID();
       switch (ID) {
-      default:
+      // NOTE: Skip dx_resource_casthandle here. They are
+      // resolved after this loop in cleanupHandleCasts.
+      case Intrinsic::dx_resource_casthandle:
+      // NOTE: llvm.dbg.value is supported as is in DXIL.
+      case Intrinsic::dbg_value:
+      case Intrinsic::not_intrinsic:
         continue;
+      default: {
+        DiagnosticInfoUnsupported Diag(
+            F, "Unsupported intrinsic for DXIL lowering");
+        M.getContext().diagnose(Diag);
+        HasErrors |= true;
+        break;
+      }
 #define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
   case Intrin:                                                                 \
     HasErrors |= replaceFunctionWithOp(                                        \
diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll
@@ -286,7 +286,7 @@ declare <3 x half> @llvm.dx.nclamp.v3f16(<3 x half>, <3 x half>, <3 x half>)
 declare <4 x float> @llvm.dx.nclamp.v4f32(<4 x float>, <4 x float>, <4 x float>)
 declare <2 x double> @llvm.dx.nclamp.v2f64(<2 x double>, <2 x double>, <2 x double>)
 declare <4 x i32> @llvm.dx.sclamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare <3 x i16> @llvm.dx.uclamp.v3i32(<3 x i16>, <3 x i32>, <3 x i16>)
+declare <3 x i16> @llvm.dx.uclamp.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
 declare <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 declare <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
 
diff --git a/llvm/test/CodeGen/DirectX/discard.ll b/llvm/test/CodeGen/DirectX/discard.ll
@@ -1,4 +1,4 @@
-; RUN: opt -passes='function(scalarizer),module(dxil-op-lower,dxil-intrinsic-expansion)' -S -mtriple=dxil-pc-shadermodel6.3-pixel %s | FileCheck %s
+; RUN: opt -passes='function(scalarizer),module(dxil-intrinsic-expansion,dxil-op-lower)' -S -mtriple=dxil-pc-shadermodel6.3-pixel %s | FileCheck %s
 
 ; CHECK-LABEL: define void @test_scalar
 ; CHECK: call void @dx.op.discard(i32 82, i1 %0)
diff --git a/llvm/test/CodeGen/DirectX/unsupported_intrinsic.ll b/llvm/test/CodeGen/DirectX/unsupported_intrinsic.ll
@@ -0,0 +1,11 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; CHECK: error: <unknown>:0:0: in function llvm.vector.reduce.and.v4i32 i32 (<4 x i32>): Unsupported intrinsic for DXIL lowering
+define i32 @fn_and(<4 x i32> %0) local_unnamed_addr #0 {
+  %2 = tail call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %0)
+  ret i32 %2
+}
+
+declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
+
+attributes #0 = { convergent norecurse nounwind "hlsl.export"}

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-; RUN: opt -passes='function(scalarizer),module(dxil-op-lower,dxil-intrinsic-expansion)' -S -mtriple=dxil-pc-shadermodel6.3-pixel %s \| FileCheck %s`
	`1`	`+; RUN: opt -passes='function(scalarizer),module(dxil-intrinsic-expansion,dxil-op-lower)' -S -mtriple=dxil-pc-shadermodel6.3-pixel %s \| FileCheck %s`
`2`	`2`
`3`	`3`	`; CHECK-LABEL: define void @test_scalar`
`4`	`4`	`; CHECK: call void @dx.op.discard(i32 82, i1 %0)`