-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[HLSL] Move length support out of the DirectX Backend #121611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,14 @@ namespace hlsl { | |
|
||
namespace __detail { | ||
|
||
template <typename T, typename U> struct is_same { | ||
static const bool value = false; | ||
}; | ||
|
||
template <typename T> struct is_same<T, T> { | ||
static const bool value = true; | ||
}; | ||
|
||
template <bool B, typename T> struct enable_if {}; | ||
|
||
template <typename T> struct enable_if<true, T> { | ||
|
@@ -33,6 +41,21 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) { | |
return __builtin_bit_cast(U, F); | ||
} | ||
|
||
template <typename T> | ||
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T> | ||
length_impl(T X) { | ||
return __builtin_elementwise_abs(X); | ||
} | ||
|
||
template <typename T, int N> | ||
enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T> | ||
length_vec_impl(vector<T, N> X) { | ||
vector<T, N> XSquared = X * X; | ||
T XSquaredSum = XSquared[0]; | ||
[unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clang-format doesn't seem to handle [unroll]
for (int i = 1; i < N; ++i)
XSquaredSum += XSquared[i]; or [unroll] for (int i = 1; i < N; ++i)
XSquaredSum += XSquared[i]; The latter is closer to what clang-format does with (aside: I'm not sure if we have a bug for clang-format for this yet, we should check) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't see one can file one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is equivalent to a dot product. Might we consolidate optimizing the same operation by making us of that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. an earlier version essentially did |
||
return __builtin_elementwise_sqrt(XSquaredSum); | ||
} | ||
|
||
} // namespace __detail | ||
} // namespace hlsl | ||
#endif //_HLSL_HLSL_DETAILS_H_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,73 +1,130 @@ | ||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ | ||
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ | ||
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ | ||
// RUN: --check-prefixes=CHECK,NATIVE_HALF | ||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ | ||
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ | ||
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you explain why you removed the NO_HALF variant? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I switched to have this file updated via |
||
// NATIVE_HALF: define noundef nofpclass(nan inf) half @ | ||
// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.fabs.f16(half | ||
// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float | ||
// NATIVE_HALF: ret half | ||
// NO_HALF: ret float | ||
half test_length_half(half p0) | ||
{ | ||
return length(p0); | ||
} | ||
// NATIVE_HALF: define noundef nofpclass(nan inf) half @ | ||
// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v2f16 | ||
// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( | ||
// NATIVE_HALF: ret half %hlsl.length | ||
// NO_HALF: ret float %hlsl.length | ||
half test_length_half2(half2 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// NATIVE_HALF: define noundef nofpclass(nan inf) half @ | ||
// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v3f16 | ||
// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( | ||
// NATIVE_HALF: ret half %hlsl.length | ||
// NO_HALF: ret float %hlsl.length | ||
half test_length_half3(half3 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// NATIVE_HALF: define noundef nofpclass(nan inf) half @ | ||
// NATIVE_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn half @llvm.dx.length.v4f16 | ||
// NO_HALF: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( | ||
// NATIVE_HALF: ret half %hlsl.length | ||
// NO_HALF: ret float %hlsl.length | ||
half test_length_half4(half4 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK: define noundef nofpclass(nan inf) float @ | ||
// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.fabs.f32(float | ||
// CHECK: ret float | ||
float test_length_float(float p0) | ||
{ | ||
return length(p0); | ||
} | ||
// CHECK: define noundef nofpclass(nan inf) float @ | ||
// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v2f32( | ||
// CHECK: ret float %hlsl.length | ||
float test_length_float2(float2 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// CHECK: define noundef nofpclass(nan inf) float @ | ||
// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v3f32( | ||
// CHECK: ret float %hlsl.length | ||
float test_length_float3(float3 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// CHECK: define noundef nofpclass(nan inf) float @ | ||
// CHECK: %hlsl.length = call reassoc nnan ninf nsz arcp afn float @llvm.dx.length.v4f32( | ||
// CHECK: ret float %hlsl.length | ||
float test_length_float4(float4 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 | ||
// RUN: %clang_cc1 -finclude-default-header -triple \ | ||
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ | ||
// RUN: -emit-llvm -O1 -o - | FileCheck %s | ||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh( | ||
// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]]) | ||
// CHECK-NEXT: ret half [[ELT_ABS_I]] | ||
// | ||
half test_length_half(half p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh( | ||
// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]] | ||
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0 | ||
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]]) | ||
// CHECK-NEXT: ret half [[TMP0]] | ||
// | ||
half test_length_half2(half2 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh( | ||
// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]] | ||
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0 | ||
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]] | ||
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2 | ||
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]]) | ||
// CHECK-NEXT: ret half [[TMP0]] | ||
// | ||
half test_length_half3(half3 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh( | ||
// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]] | ||
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0 | ||
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]] | ||
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2 | ||
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]] | ||
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3 | ||
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]]) | ||
// CHECK-NEXT: ret half [[TMP0]] | ||
// | ||
half test_length_half4(half4 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf( | ||
// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]]) | ||
// CHECK-NEXT: ret float [[ELT_ABS_I]] | ||
// | ||
float test_length_float(float p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f( | ||
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]] | ||
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0 | ||
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]]) | ||
// CHECK-NEXT: ret float [[TMP0]] | ||
// | ||
float test_length_float2(float2 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f( | ||
// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]] | ||
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0 | ||
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]] | ||
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2 | ||
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]]) | ||
// CHECK-NEXT: ret float [[TMP0]] | ||
// | ||
float test_length_float3(float3 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
|
||
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f( | ||
// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] { | ||
// CHECK-NEXT: [[ENTRY:.*:]] | ||
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]] | ||
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0 | ||
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1 | ||
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]] | ||
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2 | ||
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]] | ||
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3 | ||
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]] | ||
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]]) | ||
// CHECK-NEXT: ret float [[TMP0]] | ||
// | ||
float test_length_float4(float4 p0) | ||
{ | ||
return length(p0); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These seem fine, but we should make sure we're being aware of how much of standard C++ we're reimplementing in this header as we do more of this stuff.