Skip to content

Commit 15620e5

Browse files
committed
replace loop unroll length algo with one that doesn't have to unroll to fix test break.
1 parent 7173a48 commit 15620e5

File tree

3 files changed

+42
-60
lines changed

3 files changed

+42
-60
lines changed

clang/lib/Headers/hlsl/hlsl_detail.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,9 @@ length_impl(T X) {
4848
}
4949

5050
template <typename T, int N>
51-
enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
51+
constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
5252
length_vec_impl(vector<T, N> X) {
53-
vector<T, N> XSquared = X * X;
54-
T XSquaredSum = XSquared[0];
55-
[unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i];
56-
return __builtin_elementwise_sqrt(XSquaredSum);
53+
return __builtin_elementwise_sqrt(__builtin_hlsl_dot(X, X));
5754
}
5855

5956
} // namespace __detail

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
13011301
const inline half length(half X) { return __detail::length_impl(X); }
13021302
const inline float length(float X) { return __detail::length_impl(X); }
13031303

1304-
template <int N>
1304+
template <int N>
13051305
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
13061306
const inline half length(vector<half, N> X) {
13071307
return __detail::length_vec_impl(X);

clang/test/CodeGenHLSL/builtins/length.hlsl

Lines changed: 39 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,70 @@
1-
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
21
// RUN: %clang_cc1 -finclude-default-header -triple \
32
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
4-
// RUN: -emit-llvm -O1 -o - | FileCheck %s
3+
// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,DXCHECK \
4+
// RUN: -DTARGET=dx
55

6-
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh(
6+
// RUN: %clang_cc1 -finclude-default-header -triple \
7+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
8+
// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,SPVCHECK \
9+
// RUN: -DTARGET=spv
10+
11+
12+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z16test_length_halfDh(
13+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z16test_length_halfDh(
714
// CHECK-SAME: half noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
815
// CHECK-NEXT: [[ENTRY:.*:]]
916
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.fabs.f16(half [[P0]])
1017
// CHECK-NEXT: ret half [[ELT_ABS_I]]
1118
//
19+
1220
half test_length_half(half p0)
1321
{
1422
return length(p0);
1523
}
1624

17-
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh(
25+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh(
26+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half2Dv2_Dh(
1827
// CHECK-SAME: <2 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
1928
// CHECK-NEXT: [[ENTRY:.*:]]
20-
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x half> [[P0]], [[P0]]
21-
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
22-
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
23-
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT_I]], [[VECEXT1_I]]
24-
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I]])
29+
// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].fdot.v2f16(<2 x half> [[P0]], <2 x half> [[P0]])
30+
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[HLSL_DOT_I]])
2531
// CHECK-NEXT: ret half [[TMP0]]
2632
//
33+
34+
2735
half test_length_half2(half2 p0)
2836
{
2937
return length(p0);
3038
}
3139

32-
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh(
40+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh(
41+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half3Dv3_Dh(
3342
// CHECK-SAME: <3 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
3443
// CHECK-NEXT: [[ENTRY:.*:]]
35-
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x half> [[P0]], [[P0]]
36-
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
37-
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
38-
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
39-
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
40-
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
41-
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
44+
// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].fdot.v3f16(<3 x half> [[P0]], <3 x half> [[P0]])
45+
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[HLSL_DOT_I]])
4246
// CHECK-NEXT: ret half [[TMP0]]
4347
//
4448
half test_length_half3(half3 p0)
4549
{
4650
return length(p0);
4751
}
4852

49-
// CHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh(
53+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh(
54+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) half @_Z17test_length_half4Dv4_Dh(
5055
// CHECK-SAME: <4 x half> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
5156
// CHECK-NEXT: [[ENTRY:.*:]]
52-
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x half> [[P0]], [[P0]]
53-
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
54-
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
55-
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I]], [[VECEXT_I]]
56-
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
57-
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_1]], [[ADD_I]]
58-
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
59-
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn half [[VECEXT1_I_2]], [[ADD_I_1]]
60-
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
57+
// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn half @llvm.[[TARGET]].fdot.v4f16(<4 x half> [[P0]], <4 x half> [[P0]])
58+
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef half @llvm.sqrt.f16(half [[HLSL_DOT_I]])
6159
// CHECK-NEXT: ret half [[TMP0]]
6260
//
6361
half test_length_half4(half4 p0)
6462
{
6563
return length(p0);
6664
}
6765

68-
69-
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf(
66+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z17test_length_floatf(
67+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z17test_length_floatf(
7068
// CHECK-SAME: float noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
7169
// CHECK-NEXT: [[ENTRY:.*:]]
7270
// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.fabs.f32(float [[P0]])
@@ -77,51 +75,38 @@ float test_length_float(float p0)
7775
return length(p0);
7876
}
7977

80-
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f(
78+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f(
79+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float2Dv2_f(
8180
// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
8281
// CHECK-NEXT: [[ENTRY:.*:]]
83-
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <2 x float> [[P0]], [[P0]]
84-
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
85-
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
86-
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT_I]], [[VECEXT1_I]]
87-
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I]])
82+
// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].fdot.v2f32(<2 x float> [[P0]], <2 x float> [[P0]])
83+
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[HLSL_DOT_I]])
8884
// CHECK-NEXT: ret float [[TMP0]]
8985
//
9086
float test_length_float2(float2 p0)
9187
{
9288
return length(p0);
9389
}
9490

95-
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f(
91+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f(
92+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float3Dv3_f(
9693
// CHECK-SAME: <3 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
9794
// CHECK-NEXT: [[ENTRY:.*:]]
98-
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <3 x float> [[P0]], [[P0]]
99-
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
100-
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
101-
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
102-
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
103-
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
104-
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
95+
// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].fdot.v3f32(<3 x float> [[P0]], <3 x float> [[P0]])
96+
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[HLSL_DOT_I]])
10597
// CHECK-NEXT: ret float [[TMP0]]
10698
//
10799
float test_length_float3(float3 p0)
108100
{
109101
return length(p0);
110102
}
111103

112-
113-
// CHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f(
104+
// SPVCHECK-LABEL: define spir_func noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f(
105+
// DXCHECK-LABEL: define noundef nofpclass(nan inf) float @_Z18test_length_float4Dv4_f(
114106
// CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
115107
// CHECK-NEXT: [[ENTRY:.*:]]
116-
// CHECK-NEXT: [[MUL_I:%.*]] = fmul reassoc nnan ninf nsz arcp afn <4 x float> [[P0]], [[P0]]
117-
// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
118-
// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
119-
// CHECK-NEXT: [[ADD_I:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I]], [[VECEXT_I]]
120-
// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
121-
// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_1]], [[ADD_I]]
122-
// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
123-
// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd reassoc nnan ninf nsz arcp afn float [[VECEXT1_I_2]], [[ADD_I_1]]
124-
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
108+
// CHECK-NEXT: [[HLSL_DOT_I:%.*]] = tail call reassoc nnan ninf nsz arcp afn float @llvm.[[TARGET]].fdot.v4f32(<4 x float> [[P0]], <4 x float> [[P0]])
109+
// CHECK-NEXT: [[TMP0:%.*]] = tail call reassoc nnan ninf nsz arcp afn noundef float @llvm.sqrt.f32(float [[HLSL_DOT_I]])
125110
// CHECK-NEXT: ret float [[TMP0]]
126111
//
127112
float test_length_float4(float4 p0)

0 commit comments

Comments
 (0)