diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 468121f7d20ab..71fe555d6f689 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5204,6 +5204,18 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate { let Prototype = "T(unsigned int, T)"; } +def HLSLDerivCoarseX: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_deriv_coarse_x"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def HLSLDerivCoarseY: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_deriv_coarse_y"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td index d2ef6f99a0502..95f73cf4effbc 100644 --- a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td +++ b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td @@ -21,3 +21,4 @@ def subgroup_local_invocation_id : SPIRVBuiltin<"uint32_t()", [NoThrow, Const]>; def distance : SPIRVBuiltin<"void(...)", [NoThrow, Const]>; def length : SPIRVBuiltin<"void(...)", [NoThrow, Const]>; def smoothstep : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>; +def fwidth : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 6c0fc8d7f07be..5ed77adbb1d16 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -532,6 +532,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.frac"); } + case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_x: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable( + "deriv coarse x operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), llvm::Intrinsic::dx_deriv_coarse_x, + ArrayRef{Op0}, nullptr, "hlsl.deriv.coarse.x"); + } + case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_y: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable( + "deriv coarse x operand must have a float representation"); + return Builder.CreateIntrinsic( + /*ReturnType=*/Op0->getType(), llvm::Intrinsic::dx_deriv_coarse_y, + ArrayRef{Op0}, nullptr, "hlsl.deriv.coarse.y"); + } case Builtin::BI__builtin_hlsl_elementwise_isinf: { Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *Xty = Op0->getType(); diff --git a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp index 243aad8bf7083..43b05a128e876 100644 --- a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp @@ -151,6 +151,11 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID, Intrinsic::spv_global_offset, ArrayRef{EmitScalarExpr(E->getArg(0))}, nullptr, "spv.global.offset"); + case SPIRV::BI__builtin_spirv_fwidth: + return Builder.CreateIntrinsic( + /*ReturnType=*/getTypes().ConvertType(E->getType()), + Intrinsic::spv_fwidth, ArrayRef{EmitScalarExpr(E->getArg(0))}, + nullptr, "spv.fwidth"); } return nullptr; } diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h index c877234479ad1..01f32596ad554 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h @@ -148,6 +148,31 @@ template constexpr T ldexp_impl(T X, T Exp) { return exp2(Exp) * X; } +template constexpr T fwidth_impl(T input) { +#if (__has_builtin(__builtin_spirv_fwidth)) + return __builtin_spirv_fwidth(input); +#else + T derivCoarseX = __builtin_hlsl_elementwise_deriv_coarse_x(input); + derivCoarseX = abs(derivCoarseX); + T derivCoarseY = __builtin_hlsl_elementwise_deriv_coarse_y(input); + derivCoarseY = abs(derivCoarseY); + return derivCoarseX + derivCoarseY; +#endif +} + +template +constexpr vector fwidth_vec_impl(vector input) { +#if (__has_builtin(__builtin_spirv_fwidth)) + return __builtin_spirv_fwidth(input); +#else + vector derivCoarseX = __builtin_hlsl_elementwise_deriv_coarse_x(input); + derivCoarseX = abs(derivCoarseX); + vector derivCoarseY = __builtin_hlsl_elementwise_deriv_coarse_y(input); + derivCoarseY = abs(derivCoarseY); + return derivCoarseX + derivCoarseY; +#endif +} + } // namespace __detail } // namespace hlsl diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 5ba5bfb9abde0..1e01828fd3ba1 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -605,5 +605,30 @@ smoothstep(__detail::HLSL_FIXED_VECTOR Min, return __detail::smoothstep_vec_impl(Min, Max, X); } +//===----------------------------------------------------------------------===// +// fwidth builtin +//===----------------------------------------------------------------------===// + +/// \fn T fwidth(T x) +/// \brief Computes the sum of the absolute values of the partial derivatives +/// with regard to the x and y screen space coordinates. +/// \param x [in] The floating-point scalar or vector to process. +/// +/// The return value is a floating-point scalar or vector where each element +/// holds the computation of the matching element in the input. + +template +const inline __detail::enable_if_t< + __detail::is_arithmetic::Value && __detail::is_same::value, T> +fwidth(T input) { + return __detail::fwidth_impl(input); +} + +template +const inline __detail::HLSL_FIXED_VECTOR +fwidth(__detail::HLSL_FIXED_VECTOR input) { + return __detail::fwidth_vec_impl(input); +} + } // namespace hlsl #endif //_HLSL_HLSL_INTRINSICS_H_ diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 940d510b4cc02..fe621d62988fe 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3080,7 +3080,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case Builtin::BI__builtin_hlsl_elementwise_degrees: case Builtin::BI__builtin_hlsl_elementwise_radians: case Builtin::BI__builtin_hlsl_elementwise_rsqrt: - case Builtin::BI__builtin_hlsl_elementwise_frac: { + case Builtin::BI__builtin_hlsl_elementwise_frac: + case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_x: + case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_y: { if (SemaRef.checkArgCount(TheCall, 1)) return true; if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, diff --git a/clang/lib/Sema/SemaSPIRV.cpp b/clang/lib/Sema/SemaSPIRV.cpp index c8ea0d09c4081..0e78cff9c1774 100644 --- a/clang/lib/Sema/SemaSPIRV.cpp +++ b/clang/lib/Sema/SemaSPIRV.cpp @@ -360,6 +360,24 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(const TargetInfo &TI, case SPIRV::BI__builtin_spirv_generic_cast_to_ptr_explicit: { return checkGenericCastToPtr(SemaRef, TheCall); } + case SPIRV::BI__builtin_spirv_fwidth: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + + // Check if first argument has floating representation + ExprResult A = TheCall->getArg(0); + QualType ArgTyA = A.get()->getType(); + if (!ArgTyA->hasFloatingRepresentation()) { + SemaRef.Diag(A.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << /* ordinal */ 1 << /* scalar or vector */ 5 << /* no int */ 0 + << /* fp */ 1 << ArgTyA; + return true; + } + + QualType RetTy = ArgTyA; + TheCall->setType(RetTy); + break; + } } return false; } diff --git a/clang/test/CodeGenSPIRV/Builtins/fwidth.c b/clang/test/CodeGenSPIRV/Builtins/fwidth.c new file mode 100644 index 0000000000000..027b80500904d --- /dev/null +++ b/clang/test/CodeGenSPIRV/Builtins/fwidth.c @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -O1 -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s + +typedef _Float16 half; +typedef half half2 __attribute__((ext_vector_type(2))); +typedef half half3 __attribute__((ext_vector_type(3))); +typedef half half4 __attribute__((ext_vector_type(4))); +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); + +// CHECK: [[fwidth0:%.*]] = tail call half @llvm.spv.fwidth.f16(half {{%.*}}) +// CHECK: ret half [[fwidth0]] +half test_fwidth_half(half X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth0:%.*]] = tail call <2 x half> @llvm.spv.fwidth.v2f16(<2 x half> {{%.*}}) +// CHECK: ret <2 x half> [[fwidth0]] +half2 test_fwidth_half2(half2 X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth0:%.*]] = tail call <3 x half> @llvm.spv.fwidth.v3f16(<3 x half> {{%.*}}) +// CHECK: ret <3 x half> [[fwidth0]] +half3 test_fwidth_half3(half3 X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth0:%.*]] = tail call <4 x half> @llvm.spv.fwidth.v4f16(<4 x half> {{%.*}}) +// CHECK: ret <4 x half> [[fwidth0]] +half4 test_fwidth_half4(half4 X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth0:%.*]] = tail call float @llvm.spv.fwidth.f32(float {{%.*}}) +// CHECK: ret float [[fwidth0]] +float test_fwidth_float(float X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth1:%.*]] = tail call <2 x float> @llvm.spv.fwidth.v2f32(<2 x float> {{%.*}}) +// CHECK: ret <2 x float> [[fwidth1]] +float2 test_fwidth_float2(float2 X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth2:%.*]] = tail call <3 x float> @llvm.spv.fwidth.v3f32(<3 x float> {{%.*}}) +// CHECK: ret <3 x float> [[fwidth2]] +float3 test_fwidth_float3(float3 X) { return __builtin_spirv_fwidth(X); } + +// CHECK: [[fwidth3:%.*]] = tail call <4 x float> @llvm.spv.fwidth.v4f32(<4 x float> {{%.*}}) +// CHECK: ret <4 x float> [[fwidth3]] +float4 test_fwidth_float4(float4 X) { return __builtin_spirv_fwidth(X); } diff --git a/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c b/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c new file mode 100644 index 0000000000000..44cdd819e4332 --- /dev/null +++ b/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 %s -triple spirv-pc-vulkan-compute -verify + +typedef float float2 __attribute__((ext_vector_type(2))); + +void test_too_few_arg() +{ + return __builtin_spirv_fwidth(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float test_too_many_arg(float p0) { + return __builtin_spirv_fwidth(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_int_scalar_inputs(int p0) { + return __builtin_spirv_fwidth(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}} +} + +float test_mismatched_return(float2 p0) { + return __builtin_spirv_fwidth(p0); + // expected-error@-1 {{returning 'float2' (vector of 2 'float' values) from a function with incompatible result type 'float'}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 570d6bc35cbd0..1a4a0fc2364bd 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -162,6 +162,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0> [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>; +def int_dx_deriv_coarse_x : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_deriv_coarse_y : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 823c491e1bfee..235568f4b20eb 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -132,6 +132,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>; + def int_spv_fwidth : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 228114c5c24b2..02360cdc859fc 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -922,6 +922,24 @@ def Discard : DXILOp<82, discard> { let stages = [Stages]; } +def DerivCoarseX : DXILOp<83, unary> { + let Doc = "computes the rate of change per stamp in x direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + +def DerivCoarseY : DXILOp<84, unary> { + let Doc = "computes the rate of change per stamp in y direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let intrinsics = [IntrinSelect]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 68fd3e0bc74c7..4854a3e676918 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -48,6 +48,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_firstbitshigh: case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_frac: + case Intrinsic::dx_deriv_coarse_x: + case Intrinsic::dx_deriv_coarse_y: case Intrinsic::dx_isinf: case Intrinsic::dx_isnan: case Intrinsic::dx_rsqrt: diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 1aadd9df189a8..1293c4eb4a3a4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -179,6 +179,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectSplatVector(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectFwidth(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I) const; + bool selectCmp(Register ResVReg, const SPIRVType *ResType, unsigned comparisonOpcode, MachineInstr &I) const; bool selectDiscard(Register ResVReg, const SPIRVType *ResType, @@ -2615,6 +2618,15 @@ bool SPIRVInstructionSelector::selectDiscard(Register ResVReg, .constrainAllUses(TII, TRI, RBI); } +bool SPIRVInstructionSelector::selectFwidth(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I) const { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFwidth)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()); +} + bool SPIRVInstructionSelector::selectCmp(Register ResVReg, const SPIRVType *ResType, unsigned CmpOpc, @@ -3451,6 +3463,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_discard: { return selectDiscard(ResVReg, ResType, I); } + case Intrinsic::spv_fwidth: { + return selectFwidth(ResVReg, ResType, I); + } case Intrinsic::modf: { return selectModf(ResVReg, ResType, I); } diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll new file mode 100644 index 0000000000000..49e584c5c158e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for fwidth are generated for float, half vec, float, an32float v +; Make sure dxil operation function calls for fwidth are generated for float, half vec, flv4oat, an32float vec + + +define noundef half @deriv_coarse_x_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}}) +entry: + %dx.deriv.coarse.x = call half @llvm.dx.deriv.coarse.x.f16(half %a) + ret half %dx.deriv.coarse.x +} + +define noundef float @deriv_coarse_x_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}}) +entry: + %dx.deriv.coarse.x = call float @llvm.dx.deriv.coarse.x.f32(float %a) + ret float %dx.deriv.coarse.x +} + +define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.deriv.coarse.x = call <4 x float> @llvm.dx.deriv.coarse.x.v4f32(<4 x float> %a) + ret <4 x float> %dx.deriv.coarse.x +} + +declare half @llvm.dx.deriv.coarse.x.f16(half) +declare float @llvm.dx.deriv.coarse.x.f32(float) +declare <4 x float> @llvm.dx.deriv.coarse.x.v4f32(<4 x float>) + diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll new file mode 100644 index 0000000000000..eab495c5f7c6a --- /dev/null +++ b/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation deriv_coarse_x does not support double overload type +; CHECK: in function deriv_coarse_x +; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @deriv_coarse_x_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.deriv_coarse_x = call double @llvm.dx.deriv.coarse.x.f64(double %0) + ret double %dx.deriv_coarse_x +} diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_y.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_y.ll new file mode 100644 index 0000000000000..76671991cf46b --- /dev/null +++ b/llvm/test/CodeGen/DirectX/deriv_coarse_y.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for fwidth are generated for float, half vec, float, an32float v +; Make sure dxil operation function calls for fwidth are generated for float, half vec, flv4oat, an32float vec + + +define noundef half @deriv_coarse_y_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}}) +entry: + %dx.deriv.coarse.y = call half @llvm.dx.deriv.coarse.y.f16(half %a) + ret half %dx.deriv.coarse.y +} + +define noundef float @deriv_coarse_y_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}}) +entry: + %dx.deriv.coarse.y = call float @llvm.dx.deriv.coarse.y.f32(float %a) + ret float %dx.deriv.coarse.y +} + +define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.deriv.coarse.y = call <4 x float> @llvm.dx.deriv.coarse.y.v4f32(<4 x float> %a) + ret <4 x float> %dx.deriv.coarse.y +} + +declare half @llvm.dx.deriv.coarse.y.f16(half) +declare float @llvm.dx.deriv.coarse.y.f32(float) +declare <4 x float> @llvm.dx.deriv.coarse.y.v4f32(<4 x float>) + diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll new file mode 100644 index 0000000000000..6f48b9e88f5b9 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation deriv_coarse_y does not support double overload type +; CHECK: in function deriv_coarse_y +; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @deriv_coarse_y_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.deriv_coarse_y = call double @llvm.dx.deriv.coarse.y.f64(double %0) + ret double %dx.deriv_coarse_y +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll new file mode 100644 index 0000000000000..45ebbdda4494c --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll @@ -0,0 +1,44 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @fwidth_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpFwidth %[[#float_32]] %[[#float_32_arg]] + %elt.fwidth = call float @llvm.spv.fwidth.f32(float %a) + ret float %elt.fwidth +} + +define noundef half @fwidth_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#]] = OpFwidth %[[#float_16]] %[[#float_16_arg]] + %elt.fwidth = call half @llvm.spv.fwidth.f16(half %a) + ret half %elt.fwidth +} + +define noundef <4 x float> @fwidth_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpFwidth %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.fwidth = call <4 x float> @llvm.spv.fwidth.v4f32(<4 x float> %a) + ret <4 x float> %elt.fwidth +} + +define noundef <4 x half> @fwidth_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#]] = OpFwidth %[[#vec4_float_16]] %[[#vec4_float_16_arg]] + %elt.fwidth = call <4 x half> @llvm.spv.fwidth.v4f16(<4 x half> %a) + ret <4 x half> %elt.fwidth +} + +declare float @llvm.spv.fwidth.f32(float) +declare half @llvm.spv.fwidth.f16(half) +