adding dxil codegen

joaosaffran-zz · joaosaffran-zz · commit 91bcdc450123 · 2024-10-21T22:46:48.000Z
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18960,67 +18960,41 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
             E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
             E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
            "asuint operands types mismatch");
-
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     const HLSLOutArgExpr *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
     const HLSLOutArgExpr *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
 
-    auto emitSplitDouble =
-        [](CGBuilderTy *Builder, llvm::Intrinsic::ID intrId, llvm::Value *arg,
-           llvm::Type *retType) -> std::pair<Value *, Value *> {
-      CallInst *CI =
-          Builder->CreateIntrinsic(retType, intrId,
-                                   {arg}, nullptr, "hlsl.asuint");
-
-      Value *arg0 = Builder->CreateExtractValue(CI, 0);
-      Value *arg1 = Builder->CreateExtractValue(CI, 1);
-
-      return std::make_pair(arg0, arg1);
-    };
-
     CallArgList Args;
     auto [Op1BaseLValue, Op1TmpLValue] =
         EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
     auto [Op2BaseLValue, Op2TmpLValue] =
         EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
 
-    llvm::StructType *retType = llvm::StructType::get(Int32Ty, Int32Ty);
+    if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) {
 
-    if (!Op0->getType()->isVectorTy()) {
-      auto [arg0, arg1] = emitSplitDouble(&Builder, CGM.getHLSLRuntime().getSplitdoubleIntrinsic(), Op0, retType);
-
-      Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
-      auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
-
-      EmitWritebacks(*this, Args);
-      return s;
-    }
+      llvm::StructType *retType = llvm::StructType::get(Int32Ty, Int32Ty);
 
-    auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>();
+      if (Op0->getType()->isVectorTy()) {
+        auto *Op0VecTy = E->getArg(0)->getType()->getAs<VectorType>();
 
-    llvm::VectorType *i32VecTy = llvm::VectorType::get(
-        Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
+        llvm::VectorType *i32VecTy = llvm::VectorType::get(
+            Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
+        retType = llvm::StructType::get(i32VecTy, i32VecTy);
+      }
 
-    std::pair<Value *, Value *> inserts = std::make_pair(nullptr, nullptr);
+      CallInst *CI =
+          Builder.CreateIntrinsic(retType, Intrinsic::dx_splitdouble, {Op0},
+                                  nullptr, "hlsl.splitdouble");
 
-    for (uint64_t idx = 0; idx < Op0VecTy->getNumElements(); idx++) {
-      Value *op = Builder.CreateExtractElement(Op0, idx);
+      Value *arg0 = Builder.CreateExtractValue(CI, 0);
+      Value *arg1 = Builder.CreateExtractValue(CI, 1);
 
-      auto [arg0, arg1] = emitSplitDouble(&Builder, CGM.getHLSLRuntime().getSplitdoubleIntrinsic(), op, retType);
+      Builder.CreateStore(arg0, Op1TmpLValue.getAddress());
+      auto *s = Builder.CreateStore(arg1, Op2TmpLValue.getAddress());
 
-      if (idx == 0) {
-        inserts.first = Builder.CreateInsertElement(i32VecTy, arg0, idx);
-        inserts.second = Builder.CreateInsertElement(i32VecTy, arg1, idx);
-      } else {
-        inserts.first = Builder.CreateInsertElement(inserts.first, arg0, idx);
-        inserts.second = Builder.CreateInsertElement(inserts.second, arg1, idx);
-      }
+      EmitWritebacks(*this, Args);
+      return s;
     }
-
-    Builder.CreateStore(inserts.first, Op1TmpLValue.getAddress());
-    auto *s = Builder.CreateStore(inserts.second, Op2TmpLValue.getAddress());
-    EmitWritebacks(*this, Args);
-    return s;
   }
   }
   return nullptr;
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -88,7 +88,6 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
   GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
-  GENERATE_HLSL_INTRINSIC_FUNCTION(Splitdouble, splitdouble);
   GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
diff --git a/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl
@@ -1,33 +1,23 @@
 // RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv--vulkan-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefix=SPIRV
 
 
 
-// CHECK: define {{.*}} float {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
-// CHECK: [[VALRET:%hlsl.asuint.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALD]])
+// CHECK: define {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
+// CHECK: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALD]])
 // CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0
 // CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1
-// SPIRV: define spir_func {{.*}} float {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
-// SPIRV-NOT: @llvm.dx.splitdouble
-// SPIRV: [[REG:%.*]] = load double, ptr [[VALD]].addr
-// SPIRV: call spir_func void {{.*}}asuint{{.*}}(double {{.*}} [[REG]], {{.*}})
-float test_scalar(double D) {
+uint test_scalar(double D) {
   uint A, B;
   asuint(D, A, B);
   return A + B;
 }
 
 
-// CHECK: define {{.*}} <3 x float> {{.*}}test_vector{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
-// CHECK-COUNT-3: [[VALREG:%.*]] = extractelement <3 x double> [[VALD]], i64 [[VALIDX:[0-3]]]
-// CHECK-NEXT: [[VALRET:%hlsl.asuint.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALREG]])
-// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0
-// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1
-// SPIRV: define spir_func {{.*}} <3 x float> {{.*}}test_vector{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
-// SPIRV-NOT: @llvm.dx.splitdouble
-// SPIRV: [[REG:%.*]] = load <3 x double>, ptr [[VALD]].addr
-// SPIRV: call spir_func void {{.*}}asuint{{.*}}(<3 x double> {{.*}} [[REG]], {{.*}})
-float3 test_vector(double3 D) {
+// CHECK: define {{.*}} <3 x i32> {{.*}}test_vector{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
+// CHECK: [[VALRET:%.*]] = {{.*}} call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> [[VALD]])
+// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 1
+uint3 test_vector(double3 D) {
   uint3 A, B;
   asuint(D, A, B);
   return A + B;
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -95,6 +95,6 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
 
 def int_dx_splitdouble : DefaultAttrsIntrinsic<
     [llvm_anyint_ty, LLVMMatchType<0>], 
-    [llvm_double_ty], 
+    [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], 
     [IntrNoMem, IntrWillReturn]>;
 }
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -493,6 +493,12 @@ class OpLowerer {
 
       Value *Arg0 = CI->getArgOperand(0);
 
+      if (Arg0->getType()->isVectorTy()) {
+        return make_error<StringError>(
+            "splitdouble doesn't support lowering vector types.",
+            inconvertibleErrorCode());
+      }
+
       Type *NewRetTy = OpBuilder.getResSplitDoubleType(M.getContext());
 
       std::array<Value *, 1> Args{Arg0};
diff --git a/llvm/test/CodeGen/DirectX/splitdouble.ll b/llvm/test/CodeGen/DirectX/splitdouble.ll
@@ -1,50 +1,17 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
-; RUN: opt -S --scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+; RUN: opt -S --scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
 
-; Make sure DXILOpLowering is correctly generating the dxil op code call, with and without scalarizer.
+; Make sure DXILOpLowering is correctly generating the dxil op, with and without scalarizer.
 
-; CHECK-LABEL: define noundef float @test_scalar_double_split
-define noundef float @test_scalar_double_split(double noundef %D) local_unnamed_addr {
+; CHECK-LABEL: define noundef i32 @test_scalar_double_split
+define noundef i32 @test_scalar_double_split(double noundef %D) local_unnamed_addr {
 entry:
   ; CHECK: [[CALL:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double %D)
   ; CHECK-NEXT:extractvalue %dx.types.splitdouble [[CALL]], {{[0-1]}}
   ; CHECK-NEXT:extractvalue %dx.types.splitdouble [[CALL]], {{[0-1]}}
-  %hlsl.asuint = tail call { i32, i32 } @llvm.dx.splitdouble.i32(double %D)
-  %0 = extractvalue { i32, i32 } %hlsl.asuint, 0
-  %1 = extractvalue { i32, i32 } %hlsl.asuint, 1
+  %hlsl.splitdouble = call { i32, i32 } @llvm.dx.splitdouble.i32(double %D)
+  %0 = extractvalue { i32, i32 } %hlsl.splitdouble, 0
+  %1 = extractvalue { i32, i32 } %hlsl.splitdouble, 1
   %add = add i32 %0, %1
-  %conv = uitofp i32 %add to float
-  ret float %conv
-}
-
-declare <2 x i32> @llvm.dx.splitdouble.v2i32(double) #1
-
-
-; CHECK-LABEL: define noundef <3 x float> @test_vector_double_split
-define noundef <3 x float> @test_vector_double_split(<3 x double> noundef %D) local_unnamed_addr {
-entry:
-  %0 = extractelement <3 x double> %D, i64 0
-  ; CHECK-COUNT-3: [[CALL:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double {{.*}})
-  ; CHECK-NEXT:extractvalue %dx.types.splitdouble [[CALL]], {{[0-1]}}
-  ; CHECK-NEXT:extractvalue %dx.types.splitdouble [[CALL]], {{[0-1]}}
-  %hlsl.asuint = tail call { i32, i32 } @llvm.dx.splitdouble.i32(double %0)
-  %1 = extractvalue { i32, i32 } %hlsl.asuint, 0
-  %2 = extractvalue { i32, i32 } %hlsl.asuint, 1
-  %3 = insertelement <3 x i32> poison, i32 %1, i64 0
-  %4 = insertelement <3 x i32> poison, i32 %2, i64 0
-  %5 = extractelement <3 x double> %D, i64 1
-  %hlsl.asuint2 = tail call { i32, i32 } @llvm.dx.splitdouble.i32(double %5)
-  %6 = extractvalue { i32, i32 } %hlsl.asuint2, 0
-  %7 = extractvalue { i32, i32 } %hlsl.asuint2, 1
-  %8 = insertelement <3 x i32> %3, i32 %6, i64 1
-  %9 = insertelement <3 x i32> %4, i32 %7, i64 1
-  %10 = extractelement <3 x double> %D, i64 2
-  %hlsl.asuint3 = tail call { i32, i32 } @llvm.dx.splitdouble.i32(double %10)
-  %11 = extractvalue { i32, i32 } %hlsl.asuint3, 0
-  %12 = extractvalue { i32, i32 } %hlsl.asuint3, 1
-  %13 = insertelement <3 x i32> %8, i32 %11, i64 2
-  %14 = insertelement <3 x i32> %9, i32 %12, i64 2
-  %add = add <3 x i32> %13, %14
-  %conv = uitofp <3 x i32> %add to <3 x float>
-  ret <3 x float> %conv
+  ret i32 %add
 }
diff --git a/llvm/test/CodeGen/DirectX/splitdouble_error.ll b/llvm/test/CodeGen/DirectX/splitdouble_error.ll
@@ -0,0 +1,16 @@
+; RUN: not opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation splitdouble doesn't support vector types.
+; CHECK: in function test_vector_double_split
+; CHECK-SAME: splitdouble doesn't support lowering vector types.
+
+define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %D) local_unnamed_addr {
+entry:
+  %hlsl.splitdouble = tail call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %D)
+  %0 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.splitdouble, 0
+  %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.splitdouble, 1
+  %add = add <3 x i32> %0, %1
+  ret <3 x i32> %add
+}
+
+declare { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double>)

Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,6 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>`
`95`	`95`
`96`	`96`	`def int_dx_splitdouble : DefaultAttrsIntrinsic<`
`97`	`97`	`[llvm_anyint_ty, LLVMMatchType<0>],`
`98`		`- [llvm_double_ty],`
	`98`	`+ [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>],`
`99`	`99`	`[IntrNoMem, IntrWillReturn]>;`
`100`	`100`	`}`