@@ -12,6 +12,7 @@ define half @reduce_fast_half2(<2 x half> %vec2) {
1212; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
1313; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]]
1414; CHECK-NEXT: ret half [[ADD1]]
15+ ;
1516entry:
1617 %elt0 = extractelement <2 x half > %vec2 , i64 0
1718 %elt1 = extractelement <2 x half > %vec2 , i64 1
@@ -27,6 +28,7 @@ define half @reduce_half2(<2 x half> %vec2) {
2728; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1
2829; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]]
2930; CHECK-NEXT: ret half [[ADD1]]
31+ ;
3032entry:
3133 %elt0 = extractelement <2 x half > %vec2 , i64 0
3234 %elt1 = extractelement <2 x half > %vec2 , i64 1
@@ -38,8 +40,9 @@ define half @reduce_fast_half4(<4 x half> %vec4) {
3840; CHECK-LABEL: define half @reduce_fast_half4(
3941; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
4042; CHECK-NEXT: [[ENTRY:.*:]]
41- ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[VEC4]])
43+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[VEC4]])
4244; CHECK-NEXT: ret half [[TMP0]]
45+ ;
4346entry:
4447 %elt0 = extractelement <4 x half > %vec4 , i64 0
4548 %elt1 = extractelement <4 x half > %vec4 , i64 1
@@ -63,6 +66,7 @@ define half @reduce_half4(<4 x half> %vec4) {
6366; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]]
6467; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]]
6568; CHECK-NEXT: ret half [[ADD3]]
69+ ;
6670entry:
6771 %elt0 = extractelement <4 x half > %vec4 , i64 0
6872 %elt1 = extractelement <4 x half > %vec4 , i64 1
@@ -83,12 +87,13 @@ define half @reduce_fast_half8(<8 x half> %vec8) {
8387; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6
8488; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7
8589; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
86- ; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[TMP0]])
90+ ; CHECK-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[TMP0]])
8791; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[ELT4]]
8892; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
8993; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[OP_RDX]], [[OP_RDX1]]
9094; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[OP_RDX2]], [[ELT7]]
9195; CHECK-NEXT: ret half [[OP_RDX3]]
96+ ;
9297entry:
9398 %elt0 = extractelement <8 x half > %vec8 , i64 0
9499 %elt1 = extractelement <8 x half > %vec8 , i64 1
@@ -128,6 +133,7 @@ define half @reduce_half8(<8 x half> %vec8) {
128133; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]]
129134; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]]
130135; CHECK-NEXT: ret half [[ADD7]]
136+ ;
131137entry:
132138 %elt0 = extractelement <8 x half > %vec8 , i64 0
133139 %elt1 = extractelement <8 x half > %vec8 , i64 1
@@ -151,8 +157,9 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
151157; NOFP16-LABEL: define half @reduce_fast_half16(
152158; NOFP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
153159; NOFP16-NEXT: [[ENTRY:.*:]]
154- ; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH8000 , <16 x half> [[VEC16]])
160+ ; NOFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000 , <16 x half> [[VEC16]])
155161; NOFP16-NEXT: ret half [[TMP0]]
162+ ;
156163; FP16-LABEL: define half @reduce_fast_half16(
157164; FP16-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] {
158165; FP16-NEXT: [[ENTRY:.*:]]
@@ -165,9 +172,9 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
165172; FP16-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14
166173; FP16-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15
167174; FP16-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168- ; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[TMP0]])
175+ ; FP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[TMP0]])
169176; FP16-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
170- ; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[TMP2]])
177+ ; FP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[TMP2]])
171178; FP16-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]]
172179; FP16-NEXT: [[OP_RDX1:%.*]] = fadd fast half [[OP_RDX]], [[ELT4]]
173180; FP16-NEXT: [[OP_RDX2:%.*]] = fadd fast half [[ELT5]], [[ELT6]]
@@ -178,6 +185,7 @@ define half @reduce_fast_half16(<16 x half> %vec16) {
178185; FP16-NEXT: [[OP_RDX7:%.*]] = fadd fast half [[OP_RDX5]], [[OP_RDX6]]
179186; FP16-NEXT: [[OP_RDX8:%.*]] = fadd fast half [[OP_RDX7]], [[ELT15]]
180187; FP16-NEXT: ret half [[OP_RDX8]]
188+ ;
181189entry:
182190 %elt0 = extractelement <16 x half > %vec16 , i64 0
183191 %elt1 = extractelement <16 x half > %vec16 , i64 1
@@ -249,6 +257,7 @@ define half @reduce_half16(<16 x half> %vec16) {
249257; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]]
250258; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]]
251259; CHECK-NEXT: ret half [[ADD15]]
260+ ;
252261entry:
253262 %elt0 = extractelement <16 x half > %vec16 , i64 0
254263 %elt1 = extractelement <16 x half > %vec16 , i64 1
@@ -292,6 +301,7 @@ define float @reduce_fast_float2(<2 x float> %vec2) {
292301; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
293302; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]]
294303; CHECK-NEXT: ret float [[ADD1]]
304+ ;
295305entry:
296306 %elt0 = extractelement <2 x float > %vec2 , i64 0
297307 %elt1 = extractelement <2 x float > %vec2 , i64 1
@@ -307,6 +317,7 @@ define float @reduce_float2(<2 x float> %vec2) {
307317; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1
308318; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]]
309319; CHECK-NEXT: ret float [[ADD1]]
320+ ;
310321entry:
311322 %elt0 = extractelement <2 x float > %vec2 , i64 0
312323 %elt1 = extractelement <2 x float > %vec2 , i64 1
@@ -318,8 +329,9 @@ define float @reduce_fast_float4(<4 x float> %vec4) {
318329; CHECK-LABEL: define float @reduce_fast_float4(
319330; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] {
320331; CHECK-NEXT: [[ENTRY:.*:]]
321- ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float - 0.000000e+00, <4 x float> [[VEC4]])
332+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]])
322333; CHECK-NEXT: ret float [[TMP0]]
334+ ;
323335entry:
324336 %elt0 = extractelement <4 x float > %vec4 , i64 0
325337 %elt1 = extractelement <4 x float > %vec4 , i64 1
@@ -343,6 +355,7 @@ define float @reduce_float4(<4 x float> %vec4) {
343355; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]]
344356; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]]
345357; CHECK-NEXT: ret float [[ADD3]]
358+ ;
346359entry:
347360 %elt0 = extractelement <4 x float > %vec4 , i64 0
348361 %elt1 = extractelement <4 x float > %vec4 , i64 1
@@ -358,8 +371,9 @@ define float @reduce_fast_float8(<8 x float> %vec8) {
358371; CHECK-LABEL: define float @reduce_fast_float8(
359372; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] {
360373; CHECK-NEXT: [[ENTRY:.*:]]
361- ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float - 0.000000e+00, <8 x float> [[VEC8]])
374+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]])
362375; CHECK-NEXT: ret float [[TMP0]]
376+ ;
363377entry:
364378 %elt0 = extractelement <8 x float > %vec8 , i64 0
365379 %elt1 = extractelement <8 x float > %vec8 , i64 1
@@ -399,6 +413,7 @@ define float @reduce_float8(<8 x float> %vec8) {
399413; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]]
400414; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]]
401415; CHECK-NEXT: ret float [[ADD7]]
416+ ;
402417entry:
403418 %elt0 = extractelement <8 x float > %vec8 , i64 0
404419 %elt1 = extractelement <8 x float > %vec8 , i64 1
@@ -426,6 +441,7 @@ define double @reduce_fast_double2(<2 x double> %vec2) {
426441; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
427442; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]]
428443; CHECK-NEXT: ret double [[ADD1]]
444+ ;
429445entry:
430446 %elt0 = extractelement <2 x double > %vec2 , i64 0
431447 %elt1 = extractelement <2 x double > %vec2 , i64 1
@@ -441,6 +457,7 @@ define double @reduce_double2(<2 x double> %vec2) {
441457; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1
442458; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]]
443459; CHECK-NEXT: ret double [[ADD1]]
460+ ;
444461entry:
445462 %elt0 = extractelement <2 x double > %vec2 , i64 0
446463 %elt1 = extractelement <2 x double > %vec2 , i64 1
@@ -452,8 +469,9 @@ define double @reduce_fast_double4(<4 x double> %vec4) {
452469; CHECK-LABEL: define double @reduce_fast_double4(
453470; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] {
454471; CHECK-NEXT: [[ENTRY:.*:]]
455- ; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double - 0.000000e+00, <4 x double> [[VEC4]])
472+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]])
456473; CHECK-NEXT: ret double [[TMP0]]
474+ ;
457475entry:
458476 %elt0 = extractelement <4 x double > %vec4 , i64 0
459477 %elt1 = extractelement <4 x double > %vec4 , i64 1
@@ -477,6 +495,7 @@ define double @reduce_double4(<4 x double> %vec4) {
477495; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]]
478496; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]]
479497; CHECK-NEXT: ret double [[ADD3]]
498+ ;
480499entry:
481500 %elt0 = extractelement <4 x double > %vec4 , i64 0
482501 %elt1 = extractelement <4 x double > %vec4 , i64 1
@@ -507,6 +526,7 @@ define float @reduce_fast_float_case1(ptr %a) {
507526; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
508527; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[LOAD4]], [[ADD3]]
509528; CHECK-NEXT: ret float [[ADD4]]
529+ ;
510530entry:
511531 %load = load float , ptr %a
512532 %gep = getelementptr inbounds i8 , ptr %a , i64 4
@@ -543,6 +563,7 @@ define float @reduce_float_case1(ptr %a) {
543563; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4
544564; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]]
545565; CHECK-NEXT: ret float [[ADD4]]
566+ ;
546567entry:
547568 %load = load float , ptr %a
548569 %gep = getelementptr inbounds i8 , ptr %a , i64 4
@@ -584,6 +605,7 @@ define float @reduce_fast_float_case2(ptr %a, ptr %b) {
584605; CHECK-NEXT: [[RED2:%.*]] = fadd fast float [[ADD2]], [[RED1]]
585606; CHECK-NEXT: [[RED3:%.*]] = fadd fast float [[ADD3]], [[RED2]]
586607; CHECK-NEXT: ret float [[RED3]]
608+ ;
587609entry:
588610 %gepa1 = getelementptr inbounds float , ptr %a , i32 1
589611 %gepa2 = getelementptr inbounds float , ptr %a , i32 2
@@ -633,6 +655,7 @@ define float @reduce_float_case2(ptr %a, ptr %b) {
633655; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]]
634656; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]]
635657; CHECK-NEXT: ret float [[RED3]]
658+ ;
636659entry:
637660 %gepa1 = getelementptr inbounds float , ptr %a , i32 1
638661 %gepa2 = getelementptr inbounds float , ptr %a , i32 2
@@ -694,6 +717,7 @@ define float @reduce_fast_float_case3(ptr %a) {
694717; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]]
695718; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]]
696719; CHECK-NEXT: ret float [[ADD7]]
720+ ;
697721entry:
698722 %gep1 = getelementptr inbounds float , ptr %a , i32 1
699723 %gep2 = getelementptr inbounds float , ptr %a , i32 2
@@ -764,6 +788,7 @@ define float @reduce_float_case3(ptr %a) {
764788; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]]
765789; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]]
766790; CHECK-NEXT: ret float [[ADD7]]
791+ ;
767792entry:
768793 %gep1 = getelementptr inbounds float , ptr %a , i32 1
769794 %gep2 = getelementptr inbounds float , ptr %a , i32 2
@@ -802,8 +827,9 @@ define half @reduce_unordered_fast_half4(<4 x half> %vec4) {
802827; CHECK-LABEL: define half @reduce_unordered_fast_half4(
803828; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] {
804829; CHECK-NEXT: [[ENTRY:.*:]]
805- ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH8000 , <4 x half> [[VEC4]])
830+ ; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000 , <4 x half> [[VEC4]])
806831; CHECK-NEXT: ret half [[TMP0]]
832+ ;
807833entry:
808834 %elt0 = extractelement <4 x half > %vec4 , i64 0
809835 %elt1 = extractelement <4 x half > %vec4 , i64 1
@@ -826,6 +852,7 @@ define half @reduce_unordered_half4(<4 x half> %vec4) {
826852; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1
827853; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]]
828854; CHECK-NEXT: ret half [[ADD3]]
855+ ;
829856entry:
830857 %elt0 = extractelement <4 x half > %vec4 , i64 0
831858 %elt1 = extractelement <4 x half > %vec4 , i64 1
0 commit comments