Skip to content

Commit fb81c1d

Browse files
committed
add a small optimization to remove the extra add zero
1 parent 03b137a commit fb81c1d

File tree

4 files changed

+164
-99
lines changed

4 files changed

+164
-99
lines changed

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ class CGHLSLRuntime {
7777
GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
7878
GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
7979
GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
80-
GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
8180
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
8281
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
8382
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)

clang/test/CodeGenHLSL/builtins/length.hlsl

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2-
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
3-
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
4-
// RUN: -emit-llvm -O1 -o - | FileCheck %s
2+
// RUN: %clang_cc1 -finclude-default-header -triple \
3+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
4+
// RUN: -emit-llvm -O1 -o - | FileCheck %s
55

66

77
// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
@@ -100,11 +100,6 @@ float test_length_float3(float3 p0)
100100
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[RDX_FADD_I]])
101101
// CHECK-NEXT: ret float [[TMP0]]
102102
//
103-
loat3 p0)
104-
{
105-
return length(p0);
106-
}
107-
108103
float test_length_float4(float4 p0)
109104
{
110105
return length(p0);

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "llvm/Pass.h"
2929
#include "llvm/Support/ErrorHandling.h"
3030
#include "llvm/Support/MathExtras.h"
31+
#include <cstdint>
3132

3233
#define DEBUG_TYPE "dxil-intrinsic-expansion"
3334

@@ -74,14 +75,15 @@ static bool isIntrinsicExpansion(Function &F) {
7475
}
7576

7677
static Value *expandVecReduceFAdd(CallInst *Orig) {
77-
Value *Sum = Orig->getOperand(0);
78+
// Note: vector_reduce_fadd first argument is a starting value
79+
// Our use doesn't need it, so ignoring argument zero.
7880
Value *X = Orig->getOperand(1);
7981
IRBuilder<> Builder(Orig);
8082
Type *Ty = X->getType();
8183
auto *XVec = dyn_cast<FixedVectorType>(Ty);
8284
unsigned XVecSize = XVec->getNumElements();
83-
84-
for (unsigned I = 0; I < XVecSize; I++) {
85+
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
86+
for (unsigned I = 1; I < XVecSize; I++) {
8587
Value *Elt = Builder.CreateExtractElement(X, I);
8688
Sum = Builder.CreateFAdd(Sum, Elt);
8789
}
@@ -96,8 +98,8 @@ static Value *expandVecReduceAdd(CallInst *Orig) {
9698
auto *XVec = dyn_cast<FixedVectorType>(Ty);
9799
unsigned XVecSize = XVec->getNumElements();
98100

99-
Value *Sum = ConstantInt::get(EltTy, 0);
100-
for (unsigned I = 0; I < XVecSize; I++) {
101+
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
102+
for (unsigned I = 1; I < XVecSize; I++) {
101103
Value *Elt = Builder.CreateExtractElement(X, I);
102104
Sum = Builder.CreateAdd(Sum, Elt);
103105
}
Lines changed: 154 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,211 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK
2-
; RUN: opt -S -dxil-intrinsic-expansion -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
3+
; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK
34

45
; Make sure dxil operation function calls for length are generated for half/float.
56

67
define noundef half @test_length_half2(<2 x half> noundef %p0) {
7-
; CHECK-LABEL: define noundef half @test_length_half2(
8-
; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
9-
; CHECK: [[ENTRY:.*:]]
10-
; CHECK: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
11-
; CHECK: [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
12-
; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
13-
; CHECK: [[TMP2:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
14-
; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
15-
; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP3]])
16-
; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP3]])
17-
; CHECK: ret half [[HLSL_LENGTH]]
8+
; EXPCHECK-LABEL: define noundef half @test_length_half2(
9+
; EXPCHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
10+
; EXPCHECK-NEXT: [[ENTRY:.*:]]
11+
; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
12+
; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
13+
; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
14+
; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
15+
; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP2]])
16+
; EXPCHECK-NEXT: ret half [[HLSL_LENGTH]]
17+
;
18+
; DOPCHECK-LABEL: define noundef half @test_length_half2(
19+
; DOPCHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
20+
; DOPCHECK-NEXT: [[ENTRY:.*:]]
21+
; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <2 x half> [[P0]], i64 0
22+
; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
23+
; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <2 x half> [[P0]], i64 1
24+
; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
25+
; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
26+
; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP0]])
27+
; DOPCHECK-NEXT: ret half [[HLSL_LENGTH1]]
1828
;
1929
entry:
20-
2130
%mul.i = fmul <2 x half> %p0, %p0
2231
%rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %mul.i)
2332
%hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
2433
ret half %hlsl.length
2534
}
2635

2736
define noundef half @test_length_half3(<3 x half> noundef %p0) {
28-
; CHECK-LABEL: define noundef half @test_length_half3(
29-
; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
30-
; CHECK: [[ENTRY:.*:]]
31-
; CHECK: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
32-
; CHECK: [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
33-
; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
34-
; CHECK: [[TMP2:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
35-
; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
36-
; CHECK: [[TMP4:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
37-
; CHECK: [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
38-
; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP5]])
39-
; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP5]])
40-
; CHECK: ret half [[HLSL_LENGTH]]
37+
; EXPCHECK-LABEL: define noundef half @test_length_half3(
38+
; EXPCHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
39+
; EXPCHECK-NEXT: [[ENTRY:.*:]]
40+
; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
41+
; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
42+
; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
43+
; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
44+
; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
45+
; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
46+
; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP4]])
47+
; EXPCHECK-NEXT: ret half [[HLSL_LENGTH]]
48+
;
49+
; DOPCHECK-LABEL: define noundef half @test_length_half3(
50+
; DOPCHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
51+
; DOPCHECK-NEXT: [[ENTRY:.*:]]
52+
; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <3 x half> [[P0]], i64 0
53+
; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
54+
; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <3 x half> [[P0]], i64 1
55+
; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
56+
; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <3 x half> [[P0]], i64 2
57+
; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul half [[P0_I2]], [[P0_I2]]
58+
; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
59+
; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], [[MUL_I_I2]]
60+
; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP1]])
61+
; DOPCHECK-NEXT: ret half [[HLSL_LENGTH1]]
4162
;
4263
entry:
43-
4464
%mul.i = fmul <3 x half> %p0, %p0
4565
%rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <3 x half> %mul.i)
4666
%hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
4767
ret half %hlsl.length
4868
}
4969

5070
define noundef half @test_length_half4(<4 x half> noundef %p0) {
51-
; CHECK-LABEL: define noundef half @test_length_half4(
52-
; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
53-
; CHECK: [[ENTRY:.*:]]
54-
; CHECK: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
55-
; CHECK: [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
56-
; CHECK: [[TMP1:%.*]] = fadd half 0xH0000, [[TMP0]]
57-
; CHECK: [[TMP2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
58-
; CHECK: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
59-
; CHECK: [[TMP4:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
60-
; CHECK: [[TMP5:%.*]] = fadd half [[TMP3]], [[TMP4]]
61-
; CHECK: [[TMP6:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
62-
; CHECK: [[TMP7:%.*]] = fadd half [[TMP5]], [[TMP6]]
63-
; EXPCHECK: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP7]])
64-
; DOPCHECK: [[HLSL_LENGTH:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP7]])
65-
; CHECK: ret half [[HLSL_LENGTH]]
71+
; EXPCHECK-LABEL: define noundef half @test_length_half4(
72+
; EXPCHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
73+
; EXPCHECK-NEXT: [[ENTRY:.*:]]
74+
; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
75+
; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
76+
; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
77+
; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
78+
; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
79+
; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
80+
; EXPCHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
81+
; EXPCHECK-NEXT: [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
82+
; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call half @llvm.sqrt.f16(half [[TMP6]])
83+
; EXPCHECK-NEXT: ret half [[HLSL_LENGTH]]
84+
;
85+
; DOPCHECK-LABEL: define noundef half @test_length_half4(
86+
; DOPCHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
87+
; DOPCHECK-NEXT: [[ENTRY:.*:]]
88+
; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <4 x half> [[P0]], i64 0
89+
; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul half [[P0_I0]], [[P0_I0]]
90+
; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <4 x half> [[P0]], i64 1
91+
; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul half [[P0_I1]], [[P0_I1]]
92+
; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <4 x half> [[P0]], i64 2
93+
; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul half [[P0_I2]], [[P0_I2]]
94+
; DOPCHECK-NEXT: [[P0_I3:%.*]] = extractelement <4 x half> [[P0]], i64 3
95+
; DOPCHECK-NEXT: [[MUL_I_I3:%.*]] = fmul half [[P0_I3]], [[P0_I3]]
96+
; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd half [[MUL_I_I0]], [[MUL_I_I1]]
97+
; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], [[MUL_I_I2]]
98+
; DOPCHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP1]], [[MUL_I_I3]]
99+
; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call half @dx.op.unary.f16(i32 24, half [[TMP2]])
100+
; DOPCHECK-NEXT: ret half [[HLSL_LENGTH1]]
66101
;
67102
entry:
68-
69103
%mul.i = fmul <4 x half> %p0, %p0
70104
%rdx.fadd.i = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <4 x half> %mul.i)
71105
%hlsl.length = call half @llvm.sqrt.f16(half %rdx.fadd.i)
72106
ret half %hlsl.length
73107
}
74108

75109
define noundef float @test_length_float2(<2 x float> noundef %p0) {
76-
; CHECK-LABEL: define noundef float @test_length_float2(
77-
; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
78-
; CHECK: [[ENTRY:.*:]]
79-
; CHECK: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
80-
; CHECK: [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
81-
; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
82-
; CHECK: [[TMP2:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
83-
; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
84-
; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP3]])
85-
; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP3]])
86-
; CHECK: ret float [[HLSL_LENGTH]]
110+
; EXPCHECK-LABEL: define noundef float @test_length_float2(
111+
; EXPCHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
112+
; EXPCHECK-NEXT: [[ENTRY:.*:]]
113+
; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
114+
; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
115+
; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
116+
; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
117+
; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP2]])
118+
; EXPCHECK-NEXT: ret float [[HLSL_LENGTH]]
119+
;
120+
; DOPCHECK-LABEL: define noundef float @test_length_float2(
121+
; DOPCHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
122+
; DOPCHECK-NEXT: [[ENTRY:.*:]]
123+
; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <2 x float> [[P0]], i64 0
124+
; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
125+
; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <2 x float> [[P0]], i64 1
126+
; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
127+
; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
128+
; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP0]])
129+
; DOPCHECK-NEXT: ret float [[HLSL_LENGTH1]]
87130
;
88131
entry:
89-
90132
%mul.i = fmul <2 x float> %p0, %p0
91133
%rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %mul.i)
92134
%hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
93135
ret float %hlsl.length
94136
}
95137

96138
define noundef float @test_length_float3(<3 x float> noundef %p0) {
97-
; CHECK-LABEL: define noundef float @test_length_float3(
98-
; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
99-
; CHECK: [[ENTRY:.*:]]
100-
; CHECK: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
101-
; CHECK: [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
102-
; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
103-
; CHECK: [[TMP2:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
104-
; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
105-
; CHECK: [[TMP4:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
106-
; CHECK: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
107-
; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP5]])
108-
; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP5]])
109-
; CHECK: ret float [[HLSL_LENGTH]]
139+
; EXPCHECK-LABEL: define noundef float @test_length_float3(
140+
; EXPCHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
141+
; EXPCHECK-NEXT: [[ENTRY:.*:]]
142+
; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
143+
; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
144+
; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
145+
; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
146+
; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
147+
; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
148+
; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP4]])
149+
; EXPCHECK-NEXT: ret float [[HLSL_LENGTH]]
150+
;
151+
; DOPCHECK-LABEL: define noundef float @test_length_float3(
152+
; DOPCHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
153+
; DOPCHECK-NEXT: [[ENTRY:.*:]]
154+
; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <3 x float> [[P0]], i64 0
155+
; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
156+
; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <3 x float> [[P0]], i64 1
157+
; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
158+
; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <3 x float> [[P0]], i64 2
159+
; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul float [[P0_I2]], [[P0_I2]]
160+
; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
161+
; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], [[MUL_I_I2]]
162+
; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP1]])
163+
; DOPCHECK-NEXT: ret float [[HLSL_LENGTH1]]
110164
;
111165
entry:
112-
113166
%mul.i = fmul <3 x float> %p0, %p0
114167
%rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <3 x float> %mul.i)
115168
%hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
116169
ret float %hlsl.length
117170
}
118171

119172
define noundef float @test_length_float4(<4 x float> noundef %p0) {
120-
; CHECK-LABEL: define noundef float @test_length_float4(
121-
; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
122-
; CHECK: [[ENTRY:.*:]]
123-
; CHECK: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
124-
; CHECK: [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
125-
; CHECK: [[TMP1:%.*]] = fadd float 0.000000e+00, [[TMP0]]
126-
; CHECK: [[TMP2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
127-
; CHECK: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
128-
; CHECK: [[TMP4:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
129-
; CHECK: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
130-
; CHECK: [[TMP6:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
131-
; CHECK: [[TMP7:%.*]] = fadd float [[TMP5]], [[TMP6]]
132-
; EXPCHECK: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP7]])
133-
; DOPCHECK: [[HLSL_LENGTH:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP7]])
134-
; CHECK: ret float [[HLSL_LENGTH]]
173+
; EXPCHECK-LABEL: define noundef float @test_length_float4(
174+
; EXPCHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
175+
; EXPCHECK-NEXT: [[ENTRY:.*:]]
176+
; EXPCHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
177+
; EXPCHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
178+
; EXPCHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
179+
; EXPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
180+
; EXPCHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
181+
; EXPCHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
182+
; EXPCHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
183+
; EXPCHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
184+
; EXPCHECK-NEXT: [[HLSL_LENGTH:%.*]] = call float @llvm.sqrt.f32(float [[TMP6]])
185+
; EXPCHECK-NEXT: ret float [[HLSL_LENGTH]]
186+
;
187+
; DOPCHECK-LABEL: define noundef float @test_length_float4(
188+
; DOPCHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
189+
; DOPCHECK-NEXT: [[ENTRY:.*:]]
190+
; DOPCHECK-NEXT: [[P0_I0:%.*]] = extractelement <4 x float> [[P0]], i64 0
191+
; DOPCHECK-NEXT: [[MUL_I_I0:%.*]] = fmul float [[P0_I0]], [[P0_I0]]
192+
; DOPCHECK-NEXT: [[P0_I1:%.*]] = extractelement <4 x float> [[P0]], i64 1
193+
; DOPCHECK-NEXT: [[MUL_I_I1:%.*]] = fmul float [[P0_I1]], [[P0_I1]]
194+
; DOPCHECK-NEXT: [[P0_I2:%.*]] = extractelement <4 x float> [[P0]], i64 2
195+
; DOPCHECK-NEXT: [[MUL_I_I2:%.*]] = fmul float [[P0_I2]], [[P0_I2]]
196+
; DOPCHECK-NEXT: [[P0_I3:%.*]] = extractelement <4 x float> [[P0]], i64 3
197+
; DOPCHECK-NEXT: [[MUL_I_I3:%.*]] = fmul float [[P0_I3]], [[P0_I3]]
198+
; DOPCHECK-NEXT: [[TMP0:%.*]] = fadd float [[MUL_I_I0]], [[MUL_I_I1]]
199+
; DOPCHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], [[MUL_I_I2]]
200+
; DOPCHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], [[MUL_I_I3]]
201+
; DOPCHECK-NEXT: [[HLSL_LENGTH1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[TMP2]])
202+
; DOPCHECK-NEXT: ret float [[HLSL_LENGTH1]]
135203
;
136204
entry:
137-
138205
%mul.i = fmul <4 x float> %p0, %p0
139206
%rdx.fadd.i = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <4 x float> %mul.i)
140207
%hlsl.length = call float @llvm.sqrt.f32(float %rdx.fadd.i)
141208
ret float %hlsl.length
142209
}
210+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
211+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)