@@ -13,6 +13,88 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
1313 ret float %r
1414}
1515
16+ define half @add_v2HalfH (<2 x half > %bin.rdx ) {
17+ ; CHECK-SD-NOFP16-LABEL: add_v2HalfH:
18+ ; CHECK-SD-NOFP16: // %bb.0:
19+ ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
20+ ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
21+ ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
22+ ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
23+ ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
24+ ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
25+ ; CHECK-SD-NOFP16-NEXT: ret
26+ ;
27+ ; CHECK-SD-FP16-LABEL: add_v2HalfH:
28+ ; CHECK-SD-FP16: // %bb.0:
29+ ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
30+ ; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr
31+ ; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
32+ ; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
33+ ; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
34+ ; CHECK-SD-FP16-NEXT: ret
35+ ;
36+ ; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
37+ ; CHECK-GI-NOFP16: // %bb.0:
38+ ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
39+ ; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
40+ ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
41+ ; CHECK-GI-NOFP16-NEXT: ret
42+ ;
43+ ; CHECK-GI-FP16-LABEL: add_v2HalfH:
44+ ; CHECK-GI-FP16: // %bb.0:
45+ ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
46+ ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
47+ ; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
48+ ; CHECK-GI-FP16-NEXT: ret
49+ %r = call fast half @llvm.vector.reduce.fadd.f16.v2f16 (half -0 .0 , <2 x half > %bin.rdx )
50+ ret half %r
51+ }
52+
53+ define half @add_v3HalfH (<3 x half > %bin.rdx ) {
54+ ; CHECK-SD-NOFP16-LABEL: add_v3HalfH:
55+ ; CHECK-SD-NOFP16: // %bb.0:
56+ ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
57+ ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
58+ ; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
59+ ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[2]
60+ ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
61+ ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
62+ ; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1
63+ ; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
64+ ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
65+ ; CHECK-SD-NOFP16-NEXT: ret
66+ ;
67+ ; CHECK-SD-FP16-LABEL: add_v3HalfH:
68+ ; CHECK-SD-FP16: // %bb.0:
69+ ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
70+ ; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
71+ ; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
72+ ; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
73+ ; CHECK-SD-FP16-NEXT: ret
74+ ;
75+ ; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
76+ ; CHECK-GI-NOFP16: // %bb.0:
77+ ; CHECK-GI-NOFP16-NEXT: movi v1.2s, #128, lsl #24
78+ ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
79+ ; CHECK-GI-NOFP16-NEXT: mov v0.s[3], v1.s[0]
80+ ; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
81+ ; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
82+ ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
83+ ; CHECK-GI-NOFP16-NEXT: ret
84+ ;
85+ ; CHECK-GI-FP16-LABEL: add_v3HalfH:
86+ ; CHECK-GI-FP16: // %bb.0:
87+ ; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
88+ ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
89+ ; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
90+ ; CHECK-GI-FP16-NEXT: mov v0.h[3], v1.h[0]
91+ ; CHECK-GI-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
92+ ; CHECK-GI-FP16-NEXT: faddp h0, v0.2h
93+ ; CHECK-GI-FP16-NEXT: ret
94+ %r = call fast half @llvm.vector.reduce.fadd.f16.v3f16 (half -0 .0 , <3 x half > %bin.rdx )
95+ ret half %r
96+ }
97+
1698define half @add_HalfH (<4 x half > %bin.rdx ) {
1799; CHECK-SD-NOFP16-LABEL: add_HalfH:
18100; CHECK-SD-NOFP16: // %bb.0:
@@ -239,15 +321,15 @@ define float @fadd_reduction_v4f32_in_loop(ptr %ptr.start) {
239321; CHECK: // %bb.0: // %entry
240322; CHECK-NEXT: movi d0, #0000000000000000
241323; CHECK-NEXT: mov x8, xzr
242- ; CHECK-NEXT: .LBB9_1 : // %loop
324+ ; CHECK-NEXT: .LBB11_1 : // %loop
243325; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
244326; CHECK-NEXT: ldr q1, [x0, x8]
245327; CHECK-NEXT: add x8, x8, #16
246328; CHECK-NEXT: cmp w8, #112
247329; CHECK-NEXT: faddp v1.4s, v1.4s, v1.4s
248330; CHECK-NEXT: faddp s1, v1.2s
249331; CHECK-NEXT: fadd s0, s1, s0
250- ; CHECK-NEXT: b.ne .LBB9_1
332+ ; CHECK-NEXT: b.ne .LBB11_1
251333; CHECK-NEXT: // %bb.2: // %exit
252334; CHECK-NEXT: ret
253335entry:
@@ -276,7 +358,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
276358; CHECK-SD-NOFP16: // %bb.0: // %entry
277359; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
278360; CHECK-SD-NOFP16-NEXT: mov x8, xzr
279- ; CHECK-SD-NOFP16-NEXT: .LBB10_1 : // %loop
361+ ; CHECK-SD-NOFP16-NEXT: .LBB12_1 : // %loop
280362; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
281363; CHECK-SD-NOFP16-NEXT: ldr d1, [x0, x8]
282364; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
@@ -294,31 +376,31 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
294376; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
295377; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
296378; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
297- ; CHECK-SD-NOFP16-NEXT: b.ne .LBB10_1
379+ ; CHECK-SD-NOFP16-NEXT: b.ne .LBB12_1
298380; CHECK-SD-NOFP16-NEXT: // %bb.2: // %exit
299381; CHECK-SD-NOFP16-NEXT: ret
300382;
301383; CHECK-SD-FP16-LABEL: fadd_reduction_v4f16_in_loop:
302384; CHECK-SD-FP16: // %bb.0: // %entry
303385; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
304386; CHECK-SD-FP16-NEXT: mov x8, xzr
305- ; CHECK-SD-FP16-NEXT: .LBB10_1 : // %loop
387+ ; CHECK-SD-FP16-NEXT: .LBB12_1 : // %loop
306388; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
307389; CHECK-SD-FP16-NEXT: ldr d1, [x0, x8]
308390; CHECK-SD-FP16-NEXT: add x8, x8, #8
309391; CHECK-SD-FP16-NEXT: cmp w8, #56
310392; CHECK-SD-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
311393; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
312394; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
313- ; CHECK-SD-FP16-NEXT: b.ne .LBB10_1
395+ ; CHECK-SD-FP16-NEXT: b.ne .LBB12_1
314396; CHECK-SD-FP16-NEXT: // %bb.2: // %exit
315397; CHECK-SD-FP16-NEXT: ret
316398;
317399; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
318400; CHECK-GI-NOFP16: // %bb.0: // %entry
319401; CHECK-GI-NOFP16-NEXT: mov x8, xzr
320402; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
321- ; CHECK-GI-NOFP16-NEXT: .LBB10_1 : // %loop
403+ ; CHECK-GI-NOFP16-NEXT: .LBB12_1 : // %loop
322404; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
323405; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
324406; CHECK-GI-NOFP16-NEXT: fmov s1, w9
@@ -333,7 +415,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
333415; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
334416; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
335417; CHECK-GI-NOFP16-NEXT: fmov w9, s0
336- ; CHECK-GI-NOFP16-NEXT: b.ne .LBB10_1
418+ ; CHECK-GI-NOFP16-NEXT: b.ne .LBB12_1
337419; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
338420; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
339421; CHECK-GI-NOFP16-NEXT: ret
@@ -342,15 +424,15 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
342424; CHECK-GI-FP16: // %bb.0: // %entry
343425; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
344426; CHECK-GI-FP16-NEXT: mov x8, xzr
345- ; CHECK-GI-FP16-NEXT: .LBB10_1 : // %loop
427+ ; CHECK-GI-FP16-NEXT: .LBB12_1 : // %loop
346428; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
347429; CHECK-GI-FP16-NEXT: ldr d1, [x0, x8]
348430; CHECK-GI-FP16-NEXT: add x8, x8, #8
349431; CHECK-GI-FP16-NEXT: cmp w8, #56
350432; CHECK-GI-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
351433; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
352434; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
353- ; CHECK-GI-FP16-NEXT: b.ne .LBB10_1
435+ ; CHECK-GI-FP16-NEXT: b.ne .LBB12_1
354436; CHECK-GI-FP16-NEXT: // %bb.2: // %exit
355437; CHECK-GI-FP16-NEXT: ret
356438entry:
@@ -379,7 +461,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
379461; CHECK-SD-NOFP16: // %bb.0: // %entry
380462; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
381463; CHECK-SD-NOFP16-NEXT: mov x8, xzr
382- ; CHECK-SD-NOFP16-NEXT: .LBB11_1 : // %loop
464+ ; CHECK-SD-NOFP16-NEXT: .LBB13_1 : // %loop
383465; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
384466; CHECK-SD-NOFP16-NEXT: ldr q1, [x0, x8]
385467; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
@@ -409,15 +491,15 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
409491; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
410492; CHECK-SD-NOFP16-NEXT: fadd s0, s2, s0
411493; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
412- ; CHECK-SD-NOFP16-NEXT: b.ne .LBB11_1
494+ ; CHECK-SD-NOFP16-NEXT: b.ne .LBB13_1
413495; CHECK-SD-NOFP16-NEXT: // %bb.2: // %exit
414496; CHECK-SD-NOFP16-NEXT: ret
415497;
416498; CHECK-SD-FP16-LABEL: fadd_reduction_v8f16_in_loop:
417499; CHECK-SD-FP16: // %bb.0: // %entry
418500; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
419501; CHECK-SD-FP16-NEXT: mov x8, xzr
420- ; CHECK-SD-FP16-NEXT: .LBB11_1 : // %loop
502+ ; CHECK-SD-FP16-NEXT: .LBB13_1 : // %loop
421503; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
422504; CHECK-SD-FP16-NEXT: ldr q1, [x0, x8]
423505; CHECK-SD-FP16-NEXT: add x8, x8, #8
@@ -426,15 +508,15 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
426508; CHECK-SD-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
427509; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
428510; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
429- ; CHECK-SD-FP16-NEXT: b.ne .LBB11_1
511+ ; CHECK-SD-FP16-NEXT: b.ne .LBB13_1
430512; CHECK-SD-FP16-NEXT: // %bb.2: // %exit
431513; CHECK-SD-FP16-NEXT: ret
432514;
433515; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
434516; CHECK-GI-NOFP16: // %bb.0: // %entry
435517; CHECK-GI-NOFP16-NEXT: mov x8, xzr
436518; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
437- ; CHECK-GI-NOFP16-NEXT: .LBB11_1 : // %loop
519+ ; CHECK-GI-NOFP16-NEXT: .LBB13_1 : // %loop
438520; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
439521; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
440522; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
@@ -451,7 +533,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
451533; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
452534; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
453535; CHECK-GI-NOFP16-NEXT: fmov w9, s0
454- ; CHECK-GI-NOFP16-NEXT: b.ne .LBB11_1
536+ ; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
455537; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
456538; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
457539; CHECK-GI-NOFP16-NEXT: ret
@@ -460,7 +542,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
460542; CHECK-GI-FP16: // %bb.0: // %entry
461543; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
462544; CHECK-GI-FP16-NEXT: mov x8, xzr
463- ; CHECK-GI-FP16-NEXT: .LBB11_1 : // %loop
545+ ; CHECK-GI-FP16-NEXT: .LBB13_1 : // %loop
464546; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
465547; CHECK-GI-FP16-NEXT: ldr q1, [x0, x8]
466548; CHECK-GI-FP16-NEXT: add x8, x8, #8
@@ -469,7 +551,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
469551; CHECK-GI-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
470552; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
471553; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
472- ; CHECK-GI-FP16-NEXT: b.ne .LBB11_1
554+ ; CHECK-GI-FP16-NEXT: b.ne .LBB13_1
473555; CHECK-GI-FP16-NEXT: // %bb.2: // %exit
474556; CHECK-GI-FP16-NEXT: ret
475557entry:
0 commit comments