@@ -13,11 +13,7 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
13
13
;
14
14
; CHECK-GI-LABEL: add_HalfS:
15
15
; CHECK-GI: // %bb.0:
16
- ; CHECK-GI-NEXT: movi v1.2s, #128, lsl #24
17
- ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
18
- ; CHECK-GI-NEXT: mov s2, v0.s[1]
19
- ; CHECK-GI-NEXT: fadd s0, s1, s0
20
- ; CHECK-GI-NEXT: fadd s0, s0, s2
16
+ ; CHECK-GI-NEXT: faddp s0, v0.2s
21
17
; CHECK-GI-NEXT: ret
22
18
%r = call float @llvm.vector.reduce.fadd.f32.v2f32 (float -0 .0 , <2 x float > %bin.rdx )
23
19
ret float %r
@@ -82,15 +78,12 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
82
78
;
83
79
; CHECK-GI-FP16-LABEL: add_HalfH:
84
80
; CHECK-GI-FP16: // %bb.0:
85
- ; CHECK-GI-FP16-NEXT: adrp x8, .LCPI1_0
86
81
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
87
- ; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
88
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
89
- ; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
90
- ; CHECK-GI-FP16-NEXT: fadd h1, h1, h0
82
+ ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
83
+ ; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
84
+ ; CHECK-GI-FP16-NEXT: fadd h1, h0, h1
91
85
; CHECK-GI-FP16-NEXT: mov h0, v0.h[3]
92
86
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
93
- ; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
94
87
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
95
88
; CHECK-GI-FP16-NEXT: ret
96
89
%r = call half @llvm.vector.reduce.fadd.f16.v4f16 (half -0 .0 , <4 x half > %bin.rdx )
@@ -202,67 +195,42 @@ define half @add_H(<8 x half> %bin.rdx) {
202
195
;
203
196
; CHECK-GI-FP16-LABEL: add_H:
204
197
; CHECK-GI-FP16: // %bb.0:
205
- ; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
206
- ; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
207
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
208
- ; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
209
- ; CHECK-GI-FP16-NEXT: fadd h1, h1, h0
210
- ; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
211
- ; CHECK-GI-FP16-NEXT: mov h2, v0.h[3]
198
+ ; CHECK-GI-FP16-NEXT: mov h1, v0.h[2]
199
+ ; CHECK-GI-FP16-NEXT: faddp h2, v0.2h
200
+ ; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
201
+ ; CHECK-GI-FP16-NEXT: fadd h1, h2, h1
202
+ ; CHECK-GI-FP16-NEXT: mov h2, v0.h[4]
212
203
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
213
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[4 ]
204
+ ; CHECK-GI-FP16-NEXT: mov h3, v0.h[5 ]
214
205
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
215
- ; CHECK-GI-FP16-NEXT: mov h2, v0.h[5]
216
- ; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
217
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
206
+ ; CHECK-GI-FP16-NEXT: mov h2, v0.h[6]
218
207
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
219
- ; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
220
208
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
209
+ ; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
221
210
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
222
211
; CHECK-GI-FP16-NEXT: ret
223
212
%r = call half @llvm.vector.reduce.fadd.f16.v8f16 (half -0 .0 , <8 x half > %bin.rdx )
224
213
ret half %r
225
214
}
226
215
227
216
define float @add_S (<4 x float > %bin.rdx ) {
228
- ; CHECK-SD-LABEL: add_S:
229
- ; CHECK-SD: // %bb.0:
230
- ; CHECK-SD-NEXT: mov s1, v0.s[2]
231
- ; CHECK-SD-NEXT: faddp s2, v0.2s
232
- ; CHECK-SD-NEXT: mov s0, v0.s[3]
233
- ; CHECK-SD-NEXT: fadd s1, s2, s1
234
- ; CHECK-SD-NEXT: fadd s0, s1, s0
235
- ; CHECK-SD-NEXT: ret
236
- ;
237
- ; CHECK-GI-LABEL: add_S:
238
- ; CHECK-GI: // %bb.0:
239
- ; CHECK-GI-NEXT: movi v1.2s, #128, lsl #24
240
- ; CHECK-GI-NEXT: mov s2, v0.s[1]
241
- ; CHECK-GI-NEXT: mov s3, v0.s[2]
242
- ; CHECK-GI-NEXT: fadd s1, s1, s0
243
- ; CHECK-GI-NEXT: mov s0, v0.s[3]
244
- ; CHECK-GI-NEXT: fadd s1, s1, s2
245
- ; CHECK-GI-NEXT: fadd s1, s1, s3
246
- ; CHECK-GI-NEXT: fadd s0, s1, s0
247
- ; CHECK-GI-NEXT: ret
217
+ ; CHECK-LABEL: add_S:
218
+ ; CHECK: // %bb.0:
219
+ ; CHECK-NEXT: mov s1, v0.s[2]
220
+ ; CHECK-NEXT: faddp s2, v0.2s
221
+ ; CHECK-NEXT: mov s0, v0.s[3]
222
+ ; CHECK-NEXT: fadd s1, s2, s1
223
+ ; CHECK-NEXT: fadd s0, s1, s0
224
+ ; CHECK-NEXT: ret
248
225
%r = call float @llvm.vector.reduce.fadd.f32.v4f32 (float -0 .0 , <4 x float > %bin.rdx )
249
226
ret float %r
250
227
}
251
228
252
229
define double @add_D (<2 x double > %bin.rdx ) {
253
- ; CHECK-SD-LABEL: add_D:
254
- ; CHECK-SD: // %bb.0:
255
- ; CHECK-SD-NEXT: faddp d0, v0.2d
256
- ; CHECK-SD-NEXT: ret
257
- ;
258
- ; CHECK-GI-LABEL: add_D:
259
- ; CHECK-GI: // %bb.0:
260
- ; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
261
- ; CHECK-GI-NEXT: mov d2, v0.d[1]
262
- ; CHECK-GI-NEXT: fmov d1, x8
263
- ; CHECK-GI-NEXT: fadd d0, d1, d0
264
- ; CHECK-GI-NEXT: fadd d0, d0, d2
265
- ; CHECK-GI-NEXT: ret
230
+ ; CHECK-LABEL: add_D:
231
+ ; CHECK: // %bb.0:
232
+ ; CHECK-NEXT: faddp d0, v0.2d
233
+ ; CHECK-NEXT: ret
266
234
%r = call double @llvm.vector.reduce.fadd.f64.v2f64 (double -0 .0 , <2 x double > %bin.rdx )
267
235
ret double %r
268
236
}
@@ -464,23 +432,19 @@ define half @add_2H(<16 x half> %bin.rdx) {
464
432
;
465
433
; CHECK-GI-FP16-LABEL: add_2H:
466
434
; CHECK-GI-FP16: // %bb.0:
467
- ; CHECK-GI-FP16-NEXT: adrp x8, .LCPI5_0
468
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[1]
469
- ; CHECK-GI-FP16-NEXT: mov h4, v0.h[2]
470
- ; CHECK-GI-FP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
471
- ; CHECK-GI-FP16-NEXT: fadd h2, h2, h0
472
- ; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
473
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
435
+ ; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
436
+ ; CHECK-GI-FP16-NEXT: faddp h3, v0.2h
437
+ ; CHECK-GI-FP16-NEXT: mov h4, v0.h[3]
438
+ ; CHECK-GI-FP16-NEXT: fadd h2, h3, h2
439
+ ; CHECK-GI-FP16-NEXT: mov h3, v0.h[4]
474
440
; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
475
- ; CHECK-GI-FP16-NEXT: mov h4, v0.h[4 ]
441
+ ; CHECK-GI-FP16-NEXT: mov h4, v0.h[5 ]
476
442
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
477
- ; CHECK-GI-FP16-NEXT: mov h3, v0.h[5]
478
- ; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
479
- ; CHECK-GI-FP16-NEXT: mov h4, v0.h[6]
443
+ ; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
480
444
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
445
+ ; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
481
446
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
482
447
; CHECK-GI-FP16-NEXT: mov h3, v1.h[2]
483
- ; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
484
448
; CHECK-GI-FP16-NEXT: fadd h0, h2, h0
485
449
; CHECK-GI-FP16-NEXT: mov h2, v1.h[1]
486
450
; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
@@ -502,95 +466,51 @@ define half @add_2H(<16 x half> %bin.rdx) {
502
466
}
503
467
504
468
define float @add_2S (<8 x float > %bin.rdx ) {
505
- ; CHECK-SD-LABEL: add_2S:
506
- ; CHECK-SD: // %bb.0:
507
- ; CHECK-SD-NEXT: mov s2, v0.s[2]
508
- ; CHECK-SD-NEXT: faddp s3, v0.2s
509
- ; CHECK-SD-NEXT: mov s0, v0.s[3]
510
- ; CHECK-SD-NEXT: fadd s2, s3, s2
511
- ; CHECK-SD-NEXT: mov s3, v1.s[2]
512
- ; CHECK-SD-NEXT: fadd s0, s2, s0
513
- ; CHECK-SD-NEXT: mov s2, v1.s[1]
514
- ; CHECK-SD-NEXT: fadd s0, s0, s1
515
- ; CHECK-SD-NEXT: mov s1, v1.s[3]
516
- ; CHECK-SD-NEXT: fadd s0, s0, s2
517
- ; CHECK-SD-NEXT: fadd s0, s0, s3
518
- ; CHECK-SD-NEXT: fadd s0, s0, s1
519
- ; CHECK-SD-NEXT: ret
520
- ;
521
- ; CHECK-GI-LABEL: add_2S:
522
- ; CHECK-GI: // %bb.0:
523
- ; CHECK-GI-NEXT: movi v2.2s, #128, lsl #24
524
- ; CHECK-GI-NEXT: mov s3, v0.s[1]
525
- ; CHECK-GI-NEXT: mov s4, v0.s[2]
526
- ; CHECK-GI-NEXT: fadd s2, s2, s0
527
- ; CHECK-GI-NEXT: mov s0, v0.s[3]
528
- ; CHECK-GI-NEXT: fadd s2, s2, s3
529
- ; CHECK-GI-NEXT: mov s3, v1.s[2]
530
- ; CHECK-GI-NEXT: fadd s2, s2, s4
531
- ; CHECK-GI-NEXT: fadd s0, s2, s0
532
- ; CHECK-GI-NEXT: mov s2, v1.s[1]
533
- ; CHECK-GI-NEXT: fadd s0, s0, s1
534
- ; CHECK-GI-NEXT: mov s1, v1.s[3]
535
- ; CHECK-GI-NEXT: fadd s0, s0, s2
536
- ; CHECK-GI-NEXT: fadd s0, s0, s3
537
- ; CHECK-GI-NEXT: fadd s0, s0, s1
538
- ; CHECK-GI-NEXT: ret
469
+ ; CHECK-LABEL: add_2S:
470
+ ; CHECK: // %bb.0:
471
+ ; CHECK-NEXT: mov s2, v0.s[2]
472
+ ; CHECK-NEXT: faddp s3, v0.2s
473
+ ; CHECK-NEXT: mov s0, v0.s[3]
474
+ ; CHECK-NEXT: fadd s2, s3, s2
475
+ ; CHECK-NEXT: mov s3, v1.s[2]
476
+ ; CHECK-NEXT: fadd s0, s2, s0
477
+ ; CHECK-NEXT: mov s2, v1.s[1]
478
+ ; CHECK-NEXT: fadd s0, s0, s1
479
+ ; CHECK-NEXT: mov s1, v1.s[3]
480
+ ; CHECK-NEXT: fadd s0, s0, s2
481
+ ; CHECK-NEXT: fadd s0, s0, s3
482
+ ; CHECK-NEXT: fadd s0, s0, s1
483
+ ; CHECK-NEXT: ret
539
484
%r = call float @llvm.vector.reduce.fadd.f32.v8f32 (float -0 .0 , <8 x float > %bin.rdx )
540
485
ret float %r
541
486
}
542
487
543
488
define double @add_2D (<4 x double > %bin.rdx ) {
544
- ; CHECK-SD-LABEL: add_2D:
545
- ; CHECK-SD: // %bb.0:
546
- ; CHECK-SD-NEXT: faddp d0, v0.2d
547
- ; CHECK-SD-NEXT: mov d2, v1.d[1]
548
- ; CHECK-SD-NEXT: fadd d0, d0, d1
549
- ; CHECK-SD-NEXT: fadd d0, d0, d2
550
- ; CHECK-SD-NEXT: ret
551
- ;
552
- ; CHECK-GI-LABEL: add_2D:
553
- ; CHECK-GI: // %bb.0:
554
- ; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
555
- ; CHECK-GI-NEXT: mov d3, v0.d[1]
556
- ; CHECK-GI-NEXT: fmov d2, x8
557
- ; CHECK-GI-NEXT: fadd d0, d2, d0
558
- ; CHECK-GI-NEXT: mov d2, v1.d[1]
559
- ; CHECK-GI-NEXT: fadd d0, d0, d3
560
- ; CHECK-GI-NEXT: fadd d0, d0, d1
561
- ; CHECK-GI-NEXT: fadd d0, d0, d2
562
- ; CHECK-GI-NEXT: ret
489
+ ; CHECK-LABEL: add_2D:
490
+ ; CHECK: // %bb.0:
491
+ ; CHECK-NEXT: faddp d0, v0.2d
492
+ ; CHECK-NEXT: mov d2, v1.d[1]
493
+ ; CHECK-NEXT: fadd d0, d0, d1
494
+ ; CHECK-NEXT: fadd d0, d0, d2
495
+ ; CHECK-NEXT: ret
563
496
%r = call double @llvm.vector.reduce.fadd.f64.v4f64 (double -0 .0 , <4 x double > %bin.rdx )
564
497
ret double %r
565
498
}
566
499
567
500
; Added at least one test where the start value is not -0.0.
568
501
define float @add_S_init_42 (<4 x float > %bin.rdx ) {
569
- ; CHECK-SD-LABEL: add_S_init_42:
570
- ; CHECK-SD: // %bb.0:
571
- ; CHECK-SD-NEXT: mov w8, #1109917696 // =0x42280000
572
- ; CHECK-SD-NEXT: mov s2, v0.s[1]
573
- ; CHECK-SD-NEXT: mov s3, v0.s[2]
574
- ; CHECK-SD-NEXT: fmov s1, w8
575
- ; CHECK-SD-NEXT: fadd s1, s0, s1
576
- ; CHECK-SD-NEXT: mov s0, v0.s[3]
577
- ; CHECK-SD-NEXT: fadd s1, s1, s2
578
- ; CHECK-SD-NEXT: fadd s1, s1, s3
579
- ; CHECK-SD-NEXT: fadd s0, s1, s0
580
- ; CHECK-SD-NEXT: ret
581
- ;
582
- ; CHECK-GI-LABEL: add_S_init_42:
583
- ; CHECK-GI: // %bb.0:
584
- ; CHECK-GI-NEXT: mov w8, #1109917696 // =0x42280000
585
- ; CHECK-GI-NEXT: mov s2, v0.s[1]
586
- ; CHECK-GI-NEXT: mov s3, v0.s[2]
587
- ; CHECK-GI-NEXT: fmov s1, w8
588
- ; CHECK-GI-NEXT: fadd s1, s1, s0
589
- ; CHECK-GI-NEXT: mov s0, v0.s[3]
590
- ; CHECK-GI-NEXT: fadd s1, s1, s2
591
- ; CHECK-GI-NEXT: fadd s1, s1, s3
592
- ; CHECK-GI-NEXT: fadd s0, s1, s0
593
- ; CHECK-GI-NEXT: ret
502
+ ; CHECK-LABEL: add_S_init_42:
503
+ ; CHECK: // %bb.0:
504
+ ; CHECK-NEXT: mov w8, #1109917696 // =0x42280000
505
+ ; CHECK-NEXT: mov s2, v0.s[1]
506
+ ; CHECK-NEXT: mov s3, v0.s[2]
507
+ ; CHECK-NEXT: fmov s1, w8
508
+ ; CHECK-NEXT: fadd s1, s0, s1
509
+ ; CHECK-NEXT: mov s0, v0.s[3]
510
+ ; CHECK-NEXT: fadd s1, s1, s2
511
+ ; CHECK-NEXT: fadd s1, s1, s3
512
+ ; CHECK-NEXT: fadd s0, s1, s0
513
+ ; CHECK-NEXT: ret
594
514
%r = call float @llvm.vector.reduce.fadd.f32.v4f32 (float 42 .0 , <4 x float > %bin.rdx )
595
515
ret float %r
596
516
}
@@ -604,5 +524,3 @@ declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
604
524
declare float @llvm.vector.reduce.fadd.f32.v8f32 (float , <8 x float >)
605
525
declare double @llvm.vector.reduce.fadd.f64.v2f64 (double , <2 x double >)
606
526
declare double @llvm.vector.reduce.fadd.f64.v4f64 (double , <4 x double >)
607
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
608
- ; CHECK: {{.*}}
0 commit comments