diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index a2c202158522f..31f0c29d4c8b7 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5229,6 +5229,18 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate { let Prototype = "T(unsigned int, T)"; } +def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def HLSLDdyCoarse : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_ddy_coarse"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 384bd59e7533a..8abc15b53d8c1 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -840,6 +840,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return EmitRuntimeCall( Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); } + case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("ddx_coarse operand must have a float representation"); + Intrinsic::ID ID = CGM.getHLSLRuntime().getDdxCoarseIntrinsic(); + return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID, + ArrayRef{Op0}, nullptr, + "hlsl.ddx.coarse"); + } + case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + if (!E->getArg(0)->getType()->hasFloatingRepresentation()) + llvm_unreachable("ddy_coarse operand must have a float representation"); + Intrinsic::ID ID = CGM.getHLSLRuntime().getDdyCoarseIntrinsic(); + return Builder.CreateIntrinsic(/*ReturnType=*/Op0->getType(), ID, + ArrayRef{Op0}, nullptr, + "hlsl.ddy.coarse"); + } case Builtin::BI__builtin_get_spirv_spec_constant_bool: case Builtin::BI__builtin_get_spirv_spec_constant_short: case Builtin::BI__builtin_get_spirv_spec_constant_ushort: diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index d35df524fdc84..a13022cae9d37 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -136,6 +136,8 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) GENERATE_HLSL_INTRINSIC_FUNCTION(GetDimensionsX, resource_getdimensions_x) + GENERATE_HLSL_INTRINSIC_FUNCTION(DdxCoarse, ddx_coarse) + GENERATE_HLSL_INTRINSIC_FUNCTION(DdyCoarse, ddy_coarse) //===----------------------------------------------------------------------===// // End of reserved area for HLSL intrinsic getters. diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index d973371312701..cd54547a722d5 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -2840,5 +2840,73 @@ float4 radians(float4); _HLSL_BUILTIN_ALIAS(__builtin_hlsl_group_memory_barrier_with_group_sync) __attribute__((convergent)) void GroupMemoryBarrierWithGroupSync(void); +//===----------------------------------------------------------------------===// +// ddx_coarse builtin +//===----------------------------------------------------------------------===// + +/// \fn T ddx_coarse(T value) +/// \brief Computes a low precision partial derivative with respect to the +/// screen-space x-coordinate. +/// \param value The input value. +/// +/// The return value is a floating point scalar or vector containing the low +/// prevision partial derivative of the input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half ddx_coarse(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half2 ddx_coarse(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half3 ddx_coarse(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +half4 ddx_coarse(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float ddx_coarse(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float2 ddx_coarse(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float3 ddx_coarse(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddx_coarse) +float4 ddx_coarse(float4); + +//===----------------------------------------------------------------------===// +// ddy_coarse builtin +//===----------------------------------------------------------------------===// + +/// \fn T ddy_coarse(T value) +/// \brief Computes a low precision partial derivative with respect to the +/// screen-space y-coordinate. +/// \param value The input value. +/// +/// The return value is a floating point scalar or vector containing the low +/// prevision partial derivative of the input value. + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half ddy_coarse(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half2 ddy_coarse(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half3 ddy_coarse(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +half4 ddy_coarse(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float ddy_coarse(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float2 ddy_coarse(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float3 ddy_coarse(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_ddy_coarse) +float4 ddy_coarse(float4); + } // namespace hlsl #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_ diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 2a485da06908d..b717f6706068f 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3202,7 +3202,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case Builtin::BI__builtin_hlsl_elementwise_degrees: case Builtin::BI__builtin_hlsl_elementwise_radians: case Builtin::BI__builtin_hlsl_elementwise_rsqrt: - case Builtin::BI__builtin_hlsl_elementwise_frac: { + case Builtin::BI__builtin_hlsl_elementwise_frac: + case Builtin::BI__builtin_hlsl_elementwise_ddx_coarse: + case Builtin::BI__builtin_hlsl_elementwise_ddy_coarse: { if (SemaRef.checkArgCount(TheCall, 1)) return true; if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl new file mode 100644 index 0000000000000..01216eefadba2 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse-builtin.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh +// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddx.coarse +half test_f16_ddx_coarse(half val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef +// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddx.coarse +float test_f32_ddx_coarse(float val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl new file mode 100644 index 0000000000000..c200d4715629e --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddx-coarse.hlsl @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddx_coarseDh +// CHECK: %hlsl.ddx.coarse = call {{.*}} half @llvm.dx.ddx.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddx_coarseDh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} half @llvm.spv.ddx.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddx.coarse +half test_f16_ddx_coarse(half val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.dx.ddx.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK: ret <2 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddx_coarse2Dv2_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x half> @llvm.spv.ddx.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK-SPIRV: ret <2 x half> %hlsl.ddx.coarse +half2 test_f16_ddx_coarse2(half2 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.dx.ddx.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK: ret <3 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddx_coarse3Dv3_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x half> @llvm.spv.ddx.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK-SPIRV: ret <3 x half> %hlsl.ddx.coarse +half3 test_f16_ddx_coarse3(half3 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh +// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.dx.ddx.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK: ret <4 x half> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddx_coarse4Dv4_Dh +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK-SPIRV: ret <4 x half> %hlsl.ddx.coarse +half4 test_f16_ddx_coarse4(half4 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddx_coarsef +// CHECK: %hlsl.ddx.coarse = call {{.*}} float @llvm.dx.ddx.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddx_coarsef +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} float @llvm.spv.ddx.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddx.coarse +float test_f32_ddx_coarse(float val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.dx.ddx.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK: ret <2 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddx_coarse2Dv2_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <2 x float> @llvm.spv.ddx.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK-SPIRV: ret <2 x float> %hlsl.ddx.coarse +float2 test_f32_ddx_coarse2(float2 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.dx.ddx.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK: ret <3 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddx_coarse3Dv3_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <3 x float> @llvm.spv.ddx.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK-SPIRV: ret <3 x float> %hlsl.ddx.coarse +float3 test_f32_ddx_coarse3(float3 val) { + return ddx_coarse(val); +} + +// CHECK-LABEL: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f +// CHECK: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK: ret <4 x float> %hlsl.ddx.coarse +// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddx_coarse4Dv4_f +// CHECK-SPIRV: %hlsl.ddx.coarse = call {{.*}} <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK-SPIRV: ret <4 x float> %hlsl.ddx.coarse +float4 test_f32_ddx_coarse4(float4 val) { + return ddx_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl new file mode 100644 index 0000000000000..2967deb75031f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse-builtin.hlsl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh +// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddy.coarse +half test_f16_ddy_coarse(half val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef +// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddy.coarse +float test_f32_ddy_coarse(float val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); +} diff --git a/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl new file mode 100644 index 0000000000000..faa972a1be326 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/ddy-coarse.hlsl @@ -0,0 +1,86 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-pc-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -fnative-half-type -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK-SPIRV + +// CHECK-LABEL: half @_Z19test_f16_ddy_coarseDh +// CHECK: %hlsl.ddy.coarse = call {{.*}} half @llvm.dx.ddy.coarse.f16(half %{{.*}}) +// CHECK: ret half %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: half @_Z19test_f16_ddy_coarseDh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} half @llvm.spv.ddy.coarse.f16(half %{{.*}}) +// CHECK-SPIRV: ret half %hlsl.ddy.coarse +half test_f16_ddy_coarse(half val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.dx.ddy.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK: ret <2 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <2 x half> @_Z20test_f16_ddy_coarse2Dv2_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x half> @llvm.spv.ddy.coarse.v2f16(<2 x half> %{{.*}}) +// CHECK-SPIRV: ret <2 x half> %hlsl.ddy.coarse +half2 test_f16_ddy_coarse2(half2 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.dx.ddy.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK: ret <3 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <3 x half> @_Z20test_f16_ddy_coarse3Dv3_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x half> @llvm.spv.ddy.coarse.v3f16(<3 x half> %{{.*}}) +// CHECK-SPIRV: ret <3 x half> %hlsl.ddy.coarse +half3 test_f16_ddy_coarse3(half3 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh +// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.dx.ddy.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK: ret <4 x half> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <4 x half> @_Z20test_f16_ddy_coarse4Dv4_Dh +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %{{.*}}) +// CHECK-SPIRV: ret <4 x half> %hlsl.ddy.coarse +half4 test_f16_ddy_coarse4(half4 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: float @_Z19test_f32_ddy_coarsef +// CHECK: %hlsl.ddy.coarse = call {{.*}} float @llvm.dx.ddy.coarse.f32(float %{{.*}}) +// CHECK: ret float %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: float @_Z19test_f32_ddy_coarsef +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} float @llvm.spv.ddy.coarse.f32(float %{{.*}}) +// CHECK-SPIRV: ret float %hlsl.ddy.coarse +float test_f32_ddy_coarse(float val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.dx.ddy.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK: ret <2 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <2 x float> @_Z20test_f32_ddy_coarse2Dv2_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <2 x float> @llvm.spv.ddy.coarse.v2f32(<2 x float> %{{.*}}) +// CHECK-SPIRV: ret <2 x float> %hlsl.ddy.coarse +float2 test_f32_ddy_coarse2(float2 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.dx.ddy.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK: ret <3 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <3 x float> @_Z20test_f32_ddy_coarse3Dv3_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <3 x float> @llvm.spv.ddy.coarse.v3f32(<3 x float> %{{.*}}) +// CHECK-SPIRV: ret <3 x float> %hlsl.ddy.coarse +float3 test_f32_ddy_coarse3(float3 val) { + return ddy_coarse(val); +} + +// CHECK-LABEL: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f +// CHECK: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK: ret <4 x float> %hlsl.ddy.coarse +// CHECK-LABEL-SPIRV: <4 x float> @_Z20test_f32_ddy_coarse4Dv4_f +// CHECK-SPIRV: %hlsl.ddy.coarse = call {{.*}} <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %{{.*}}) +// CHECK-SPIRV: ret <4 x float> %hlsl.ddy.coarse +float4 test_f32_ddy_coarse4(float4 val) { + return ddy_coarse(val); +} diff --git a/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl new file mode 100644 index 0000000000000..6bb7d5c0f9a2c --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/ddx-coarse-errors.hlsl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify + +typedef float float2 __attribute__((ext_vector_type(2))); + +float no_arg() { + return __builtin_hlsl_elementwise_ddx_coarse(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float too_many_args(float val) { + return __builtin_hlsl_elementwise_ddx_coarse(val, val); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_integer_scalar_input(int val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}} +} + +double test_double_scalar_input(double val) { + return __builtin_hlsl_elementwise_ddx_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl new file mode 100644 index 0000000000000..fe01ec0f4c4b0 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/ddy-coarse-errors.hlsl @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify + +float no_arg() { + return __builtin_hlsl_elementwise_ddy_coarse(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} +} + +float too_many_args(float val) { + return __builtin_hlsl_elementwise_ddy_coarse(val, val); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} +} + +float test_integer_scalar_input(int val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'int')}} +} + +double test_double_scalar_input(double val) { + return __builtin_hlsl_elementwise_ddy_coarse(val); + // expected-error@-1 {{1st argument must be a scalar or vector of 16 or 32 bit floating-point types (was 'double')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 3b7077c52db21..111f3e1754901 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -165,6 +165,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0> [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>; +def int_dx_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; +def int_dx_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 49a182be98acd..7865d473d2efc 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -132,6 +132,8 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>; + def int_spv_ddx_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_spv_ddy_coarse : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 44c48305f2832..95b9bc4078fbd 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -930,6 +930,24 @@ def Discard : DXILOp<82, discard> { let stages = [Stages]; } +def DerivCoarseX : DXILOp<83, unary> { + let Doc = "computes the rate of change per stamp in x direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + +def DerivCoarseY : DXILOp<84, unary> { + let Doc = "computes the rate of change per stamp in y direction"; + let intrinsics = [IntrinSelect]; + let arguments = [OverloadTy]; + let result = OverloadTy; + let overloads = [Overloads]; + let stages = [Stages]; +} + def ThreadId : DXILOp<93, threadId> { let Doc = "Reads the thread ID"; let intrinsics = [IntrinSelect]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 68fd3e0bc74c7..7c5116abb7276 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -60,6 +60,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_wave_reduce_usum: case Intrinsic::dx_imad: case Intrinsic::dx_umad: + case Intrinsic::dx_ddx_coarse: + case Intrinsic::dx_ddy_coarse: return true; default: return false; diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 6181abb281cc6..33151a8bb825a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -1697,11 +1697,16 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSPIRVType(unsigned BitWidth, MachineIRBuilder MIRBuilder(DepMBB, DepMBB.getFirstNonPHI()); const MachineInstr *NewMI = createOpType(MIRBuilder, [&](MachineIRBuilder &MIRBuilder) { - return BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(), - MIRBuilder.getDL(), TII.get(SPIRVOPcode)) - .addDef(createTypeVReg(CurMF->getRegInfo())) - .addImm(BitWidth) - .addImm(0); + auto NewTypeMI = BuildMI(MIRBuilder.getMBB(), *MIRBuilder.getInsertPt(), + MIRBuilder.getDL(), TII.get(SPIRVOPcode)) + .addDef(createTypeVReg(CurMF->getRegInfo())) + .addImm(BitWidth); + // Don't add Encoding to FP type + if (!Ty->isFloatTy()) { + return NewTypeMI.addImm(0); + } else { + return NewTypeMI; + } }); add(Ty, false, NewMI); return finishCreatingSPIRVType(Ty, NewMI); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 021353ab716f7..554d3e6b75a69 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -325,6 +325,8 @@ class SPIRVInstructionSelector : public InstructionSelector { MachineInstr &I) const; bool selectFrexp(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const; + bool selectDpdCoarse(Register ResVReg, const SPIRVType *ResType, + MachineInstr &I, const unsigned DPdOpCode) const; // Utilities std::pair buildI32Constant(uint32_t Val, MachineInstr &I, @@ -3099,6 +3101,59 @@ bool SPIRVInstructionSelector::wrapIntoSpecConstantOp( return Result; } +bool SPIRVInstructionSelector::selectDpdCoarse(Register ResVReg, + const SPIRVType *ResType, + MachineInstr &I, + const unsigned DPdOpCode) const { + // If the arg/result types are half then we need to wrap the instr in + // conversions to float + // This case occurs because a half arg/result is legal in HLSL but not spirv. + Register SrcReg = I.getOperand(2).getReg(); + SPIRVType *SrcType = GR.getSPIRVTypeForVReg(SrcReg); + unsigned BitWidth = std::min(GR.getScalarOrVectorBitWidth(SrcType), + GR.getScalarOrVectorBitWidth(ResType)); + if (BitWidth == 32) { + return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(I.getOperand(2).getReg()); + } else { + MachineIRBuilder MIRBuilder(I); + unsigned componentCount = GR.getScalarOrVectorComponentCount(SrcType); + SPIRVType *Float32Ty = GR.getOrCreateSPIRVFloatType(32, I, TII); + SPIRVType *F32ConvertTy; + if (componentCount == 1) { + F32ConvertTy = Float32Ty; + } else { + F32ConvertTy = GR.getOrCreateSPIRVVectorType(Float32Ty, componentCount, + MIRBuilder, false); + } + + const TargetRegisterClass *RegClass = GR.getRegClass(SrcType); + Register ConvertToVReg = MRI->createVirtualRegister(RegClass); + Register DpdOpVReg = MRI->createVirtualRegister(RegClass); + + bool Result = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert)) + .addDef(ConvertToVReg) + .addUse(GR.getSPIRVTypeID(F32ConvertTy)) + .addUse(SrcReg) + .constrainAllUses(TII, TRI, RBI); + Result &= BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(DPdOpCode)) + .addDef(DpdOpVReg) + .addUse(GR.getSPIRVTypeID(F32ConvertTy)) + .addUse(ConvertToVReg) + .constrainAllUses(TII, TRI, RBI); + Result &= + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFConvert)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(DpdOpVReg) + .constrainAllUses(TII, TRI, RBI); + return Result; + } +} + bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { @@ -3472,6 +3527,12 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_resource_nonuniformindex: { return selectResourceNonUniformIndex(ResVReg, ResType, I); } + case Intrinsic::spv_ddx_coarse: { + return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdxCoarse); + } + case Intrinsic::spv_ddy_coarse: { + return selectDpdCoarse(ResVReg, ResType, I, SPIRV::OpDPdyCoarse); + } default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index f7cdfcb65623b..63b77321a0307 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -934,7 +934,8 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan( Capability::UniformBufferArrayDynamicIndexing, Capability::SampledImageArrayDynamicIndexing, Capability::StorageBufferArrayDynamicIndexing, - Capability::StorageImageArrayDynamicIndexing}); + Capability::StorageImageArrayDynamicIndexing, + Capability::DerivativeControl}); // Became core in Vulkan 1.2 if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) { @@ -2061,6 +2062,11 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::PredicatedIOINTEL); break; } + case SPIRV::OpDPdxCoarse: + case SPIRV::OpDPdyCoarse: { + Reqs.addCapability(SPIRV::Capability::DerivativeControl); + break; + } default: break; diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll new file mode 100644 index 0000000000000..2cf0d1db27efe --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddx_coarse-errors.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation ddx.coarse does not support double overload type +; CHECK: in function ddx.coarse +; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @ddx.coarse_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.ddx.coarse = call double @llvm.dx.ddx.coarse.f64(double %0) + ret double %dx.ddx.coarse +} \ No newline at end of file diff --git a/llvm/test/CodeGen/DirectX/ddx_coarse.ll b/llvm/test/CodeGen/DirectX/ddx_coarse.ll new file mode 100644 index 0000000000000..986f7a57fd5ea --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddx_coarse.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for ddx_coarse are generated for half/float and matching vectors + +define noundef half @deriv_coarse_x_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}}) +entry: + %dx.ddx.coarse = call half @llvm.dx.ddx.coarse.f16(half %a) + ret half %dx.ddx.coarse +} + +define noundef float @deriv_coarse_x_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}}) +entry: + %dx.ddx.coarse = call float @llvm.dx.ddx.coarse.f32(float %a) + ret float %dx.ddx.coarse +} + +define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.ddx.coarse = call <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float> %a) + ret <4 x float> %dx.ddx.coarse +} + +declare half @llvm.dx.ddx.coarse.f16(half) +declare float @llvm.dx.ddx.coarse.f32(float) +declare <4 x float> @llvm.dx.ddx.coarse.v4f32(<4 x float>) + diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll new file mode 100644 index 0000000000000..400969f21713c --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddy_coarse-errors.ll @@ -0,0 +1,15 @@ +; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s + +; DXIL operation ddy.coarse does not support double overload type +; CHECK: in function ddy.coarse +; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type + +; Function Attrs: noinline nounwind optnone +define noundef double @ddy.coarse_double(double noundef %a) #0 { +entry: + %a.addr = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + %0 = load double, ptr %a.addr, align 8 + %dx.ddy.coarse = call double @llvm.dx.ddy.coarse.f64(double %0) + ret double %dx.ddy.coarse +} \ No newline at end of file diff --git a/llvm/test/CodeGen/DirectX/ddy_coarse.ll b/llvm/test/CodeGen/DirectX/ddy_coarse.ll new file mode 100644 index 0000000000000..24c6d6c9d745a --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ddy_coarse.ll @@ -0,0 +1,41 @@ +; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +; Make sure dxil operation function calls for ddy_coarse are generated for half/float and matching vectors + +define noundef half @deriv_coarse_y_half(half noundef %a) { +; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}}) +entry: + %dx.ddy.coarse = call half @llvm.dx.ddy.coarse.f16(half %a) + ret half %dx.ddy.coarse +} + +define noundef float @deriv_coarse_y_float(float noundef %a) { +; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}}) +entry: + %dx.ddy.coarse = call float @llvm.dx.ddy.coarse.f32(float %a) + ret float %dx.ddy.coarse +} + +define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) { +; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 +; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]]) +; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 +; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]]) +; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2 +; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]]) +; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3 +; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]]) +; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2 +; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3 +; CHECK: ret <4 x float> %{{.*}} +entry: + %dx.ddy.coarse = call <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float> %a) + ret <4 x float> %dx.ddy.coarse +} + +declare half @llvm.dx.ddy.coarse.f16(half) +declare float @llvm.dx.ddy.coarse.f32(float) +declare <4 x float> @llvm.dx.ddy.coarse.v4f32(<4 x float>) + diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll new file mode 100644 index 0000000000000..1d88dac855bf5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddx_coarse.ll @@ -0,0 +1,47 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @ddx_coarse_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpDPdxCoarse %[[#float_32]] %[[#float_32_arg]] + %elt.ddx.coarse = call float @llvm.spv.ddx.coarse.f32(float %a) + ret float %elt.ddx.coarse +} + +define noundef half @ddx_coarse_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]] + %elt.ddx.coarse = call half @llvm.spv.ddx.coarse.f16(half %a) + ret half %elt.ddx.coarse +} + +define noundef <4 x float> @ddx_coarse_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.ddx.coarse = call <4 x float> @llvm.spv.ddx.coarse.v4f32(<4 x float> %a) + ret <4 x float> %elt.ddx.coarse +} + +define noundef <4 x half> @ddx_coarse_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdxCoarse %[[#vec4_float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]] + %elt.ddx.coarse = call <4 x half> @llvm.spv.ddx.coarse.v4f16(<4 x half> %a) + ret <4 x half> %elt.ddx.coarse +} + +declare float @llvm.spv.ddx.coarse.f32(float) +declare half @llvm.spv.ddx.coarse.f16(half) \ No newline at end of file diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll new file mode 100644 index 0000000000000..68105e03bd02a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ddy_coarse.ll @@ -0,0 +1,47 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 + +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 + +define noundef float @ddy_coarse_float(float noundef %a) { +entry: +; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]] +; CHECK: %[[#]] = OpDPdyCoarse %[[#float_32]] %[[#float_32_arg]] + %elt.ddy.coarse = call float @llvm.spv.ddy.coarse.f32(float %a) + ret float %elt.ddy.coarse +} + +define noundef half @ddy_coarse_half(half noundef %a) { +entry: +; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#float_32:]] %[[#float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#float_16]] %[[#coarse]] + %elt.ddy.coarse = call half @llvm.spv.ddy.coarse.f16(half %a) + ret half %elt.ddy.coarse +} + +define noundef <4 x float> @ddy_coarse_float_vector(<4 x float> noundef %a) { +entry: +; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]] +; CHECK: %[[#]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#vec4_float_32_arg]] + %elt.ddy.coarse = call <4 x float> @llvm.spv.ddy.coarse.v4f32(<4 x float> %a) + ret <4 x float> %elt.ddy.coarse +} + +define noundef <4 x half> @ddy_coarse_half_vector(<4 x half> noundef %a) { +entry: +; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]] +; CHECK: %[[#converted:]] = OpFConvert %[[#vec4_float_32:]] %[[#vec4_float_16_arg]] +; CHECK: %[[#coarse:]] = OpDPdyCoarse %[[#vec4_float_32]] %[[#converted]] +; CHECK: %[[#]] = OpFConvert %[[#vec4_float_16]] %[[#coarse]] + %elt.ddy.coarse = call <4 x half> @llvm.spv.ddy.coarse.v4f16(<4 x half> %a) + ret <4 x half> %elt.ddy.coarse +} + +declare float @llvm.spv.ddy.coarse.f32(float) +declare half @llvm.spv.ddy.coarse.f16(half) \ No newline at end of file