Skip to content

[HLSL] Move length support out of the DirectX Backend #121611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4865,12 +4865,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLLength : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_length"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_lerp"];
let Attributes = [NoThrow, Const];
Expand Down
14 changes: 0 additions & 14 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19346,20 +19346,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
}
case Builtin::BI__builtin_hlsl_length: {
Value *X = EmitScalarExpr(E->getArg(0));

assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
"length operand must have a float representation");
// if the operand is a scalar, we can use the fabs llvm intrinsic directly
if (!E->getArg(0)->getType()->isVectorType())
return EmitFAbs(*this, X);

return Builder.CreateIntrinsic(
/*ReturnType=*/X->getType()->getScalarType(),
CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
nullptr, "hlsl.length");
}
case Builtin::BI__builtin_hlsl_normalize: {
Value *X = EmitScalarExpr(E->getArg(0));

Expand Down
1 change: 0 additions & 1 deletion clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
Expand Down
23 changes: 23 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ namespace hlsl {

namespace __detail {

template <typename T, typename U> struct is_same {
static const bool value = false;
};

template <typename T> struct is_same<T, T> {
static const bool value = true;
};
Comment on lines +16 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These seem fine, but we should make sure we're being aware of how much of standard C++ we're reimplementing in this header as we do more of this stuff.


template <bool B, typename T> struct enable_if {};

template <typename T> struct enable_if<true, T> {
Expand All @@ -33,6 +41,21 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}

template <typename T>
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
length_impl(T X) {
return __builtin_elementwise_abs(X);
}

template <typename T, int N>
enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
length_vec_impl(vector<T, N> X) {
vector<T, N> XSquared = X * X;
T XSquaredSum = XSquared[0];
[unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clang-format doesn't seem to handle [unroll], but this should probably be one of:

  [unroll]
  for (int i = 1; i < N; ++i)
    XSquaredSum += XSquared[i];

or

  [unroll] for (int i = 1; i < N; ++i)
    XSquaredSum += XSquared[i];

The latter is closer to what clang-format does with [[unroll]] instead of [unroll] to trick it into seeing that as an attribute.

(aside: I'm not sure if we have a bug for clang-format for this yet, we should check)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't see one can file one.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is equivalent to a dot product. Might we consolidate optimizing the same operation by making us of that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

an earlier version essentially did sqrt(dot(x,x)); I was told not to do that because the implementation in DXC should be considered the spec compliant implementation.

return __builtin_elementwise_sqrt(XSquaredSum);
}

} // namespace __detail
} // namespace hlsl
#endif //_HLSL_HLSL_DETAILS_H_
29 changes: 9 additions & 20 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1297,27 +1297,16 @@ float4 lerp(float4, float4, float4);
///
/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
half length(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
half length(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
half length(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
half length(half4);
const inline half length(half X) { return __detail::length_impl(X); }
const inline float length(float X) { return __detail::length_impl(X); }

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
float length(float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
float length(float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
float length(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
float length(float4);
template <int N> const inline half length(vector<half, N> X) {
return __detail::length_vec_impl(X);
}

template <int N> const inline float length(vector<float, N> X) {
return __detail::length_vec_impl(X);
}

//===----------------------------------------------------------------------===//
// log builtins
Expand Down
18 changes: 0 additions & 18 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2100,24 +2100,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_length: {
if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
return true;
if (SemaRef.checkArgCount(TheCall, 1))
return true;

ExprResult A = TheCall->getArg(0);
QualType ArgTyA = A.get()->getType();
QualType RetTy;

if (auto *VTy = ArgTyA->getAs<VectorType>())
RetTy = VTy->getElementType();
else
RetTy = TheCall->getArg(0)->getType();

TheCall->setType(RetTy);
break;
}
case Builtin::BI__builtin_hlsl_mad: {
if (SemaRef.checkArgCount(TheCall, 3))
return true;
Expand Down
203 changes: 130 additions & 73 deletions clang/test/CodeGenHLSL/builtins/length.hlsl
Original file line number Diff line number Diff line change
@@ -1,73 +1,130 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you explain why you removed the NO_HALF variant?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I switched to have this file updated via update_cc_test_checks.py instead of manually written to save time writing tests. Keeping the NO_HALF variant was resulting in a lot of check duplication with very little benefit. We have plenty of other tests that check that a float will become a half without the fnative-half-type flag. I think we should probably update all the the tests to get ride of NO_HALF and just have one test that checks this flag is working as intended.

// NATIVE_HALF: define noundef nofpclass(nan inf) half @
// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half
// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
// NATIVE_HALF: ret half
// NO_HALF: ret float
half test_length_half(half p0)
{
return length(p0);
}
// NATIVE_HALF: define noundef nofpclass(nan inf) half @
// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16
// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
// NATIVE_HALF: ret half %hlsl.length
// NO_HALF: ret float %hlsl.length
half test_length_half2(half2 p0)
{
return length(p0);
}
// NATIVE_HALF: define noundef nofpclass(nan inf) half @
// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16
// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
// NATIVE_HALF: ret half %hlsl.length
// NO_HALF: ret float %hlsl.length
half test_length_half3(half3 p0)
{
return length(p0);
}
// NATIVE_HALF: define noundef nofpclass(nan inf) half @
// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16
// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
// NATIVE_HALF: ret half %hlsl.length
// NO_HALF: ret float %hlsl.length
half test_length_half4(half4 p0)
{
return length(p0);
}

// CHECK: define noundef nofpclass(nan inf) float @
// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float
// CHECK: ret float
float test_length_float(float p0)
{
return length(p0);
}
// CHECK: define noundef nofpclass(nan inf) float @
// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32(
// CHECK: ret float %hlsl.length
float test_length_float2(float2 p0)
{
return length(p0);
}
// CHECK: define noundef nofpclass(nan inf) float @
// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32(
// CHECK: ret float %hlsl.length
float test_length_float3(float3 p0)
{
return length(p0);
}
// CHECK: define noundef nofpclass(nan inf) float @
// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32(
// CHECK: ret float %hlsl.length
float test_length_float4(float4 p0)
{
return length(p0);
}
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -finclude-default-header -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -O1 -o - | FileCheck %s

// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh(
// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]])
// CHECK-NEXT: ret half [[ELT_ABS_I]]
//
half test_length_half(half p0)
{
return length(p0);
}

// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh(
// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]]
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]])
// CHECK-NEXT: ret half [[TMP0]]
//
half test_length_half2(half2 p0)
{
return length(p0);
}

// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh(
// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]]
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
// CHECK-NEXT: ret half [[TMP0]]
//
half test_length_half3(half3 p0)
{
return length(p0);
}

// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh(
// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]]
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
// CHECK-NEXT: ret half [[TMP0]]
//
half test_length_half4(half4 p0)
{
return length(p0);
}


// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf(
// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]])
// CHECK-NEXT: ret float [[ELT_ABS_I]]
//
float test_length_float(float p0)
{
return length(p0);
}

// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f(
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]]
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]])
// CHECK-NEXT: ret float [[TMP0]]
//
float test_length_float2(float2 p0)
{
return length(p0);
}

// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f(
// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]]
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
// CHECK-NEXT: ret float [[TMP0]]
//
float test_length_float3(float3 p0)
{
return length(p0);
}


// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f(
// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]]
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
// CHECK-NEXT: ret float [[TMP0]]
//
float test_length_float4(float4 p0)
{
return length(p0);
}
Loading
Loading