Skip to content

Commit c3dfbb6

Browse files
authored
[AArch64][GlobalISel] Add commute_constant_to_rhs to post legalizer combiners (llvm#81103)
This helps the fp reductions, moving the constant operands to the RHS which in turn helps simplify away fadd -0.0 and fmul 1.0.
1 parent 5932fcc commit c3dfbb6

File tree

4 files changed

+105
-236
lines changed

4 files changed

+105
-236
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,5 +288,6 @@ def AArch64PostLegalizerCombiner
288288
constant_fold_binops, identity_combines,
289289
ptr_add_immed_chain, overlapping_and,
290290
split_store_zero_128, undef_combines,
291-
select_to_minmax, or_to_bsp]> {
291+
select_to_minmax, or_to_bsp,
292+
commute_constant_to_rhs]> {
292293
}

llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,8 +2146,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
21462146
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
21472147
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
21482148
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
2149-
; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
2150-
; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
2149+
; CHECK-OUTLINE-O1-NEXT: mvn w0, w1
21512150
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
21522151
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr1_rel
21532152
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
@@ -3202,8 +3201,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
32023201
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
32033202
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
32043203
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
3205-
; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
3206-
; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
3204+
; CHECK-OUTLINE-O1-NEXT: mvn w0, w1
32073205
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
32083206
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr2_rel
32093207
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
@@ -4255,8 +4253,7 @@ define i32 @atomicrmw_and_i32(ptr %ptr, i32 %rhs) {
42554253
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
42564254
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
42574255
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
4258-
; CHECK-OUTLINE-O1-NEXT: mov w8, #-1 ; =0xffffffff
4259-
; CHECK-OUTLINE-O1-NEXT: eor w0, w8, w1
4256+
; CHECK-OUTLINE-O1-NEXT: mvn w0, w1
42604257
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
42614258
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr4_rel
42624259
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
@@ -5276,8 +5273,7 @@ define i64 @atomicrmw_and_i64(ptr %ptr, i64 %rhs) {
52765273
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w30, -8
52775274
; CHECK-OUTLINE-O1-NEXT: .cfi_offset w29, -16
52785275
; CHECK-OUTLINE-O1-NEXT: mov x2, x0
5279-
; CHECK-OUTLINE-O1-NEXT: mov x8, #-1 ; =0xffffffffffffffff
5280-
; CHECK-OUTLINE-O1-NEXT: eor x0, x8, x1
5276+
; CHECK-OUTLINE-O1-NEXT: mvn x0, x1
52815277
; CHECK-OUTLINE-O1-NEXT: mov x1, x2
52825278
; CHECK-OUTLINE-O1-NEXT: bl ___aarch64_ldclr8_rel
52835279
; CHECK-OUTLINE-O1-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload

llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll

Lines changed: 66 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,7 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
1313
;
1414
; CHECK-GI-LABEL: add_HalfS:
1515
; CHECK-GI: // %bb.0:
16-
; CHECK-GI-NEXT: movi v1.2s, #128, lsl #24
17-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
18-
; CHECK-GI-NEXT: mov s2, v0.s[1]
19-
; CHECK-GI-NEXT: fadd s0, s1, s0
20-
; CHECK-GI-NEXT: fadd s0, s0, s2
16+
; CHECK-GI-NEXT: faddp s0, v0.2s
2117
; CHECK-GI-NEXT: ret
2218
%r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
2319
ret float %r
@@ -82,15 +78,12 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
8278
;
8379
; CHECK-GI-FP16-LABEL: add_HalfH:
8480
; CHECK-GI-FP16: // %bb.0:
85-
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI1_0
8681
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
87-
; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
88-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
89-
; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
90-
; CHECK-GI-FP16-NEXT: fadd h1, h1, h0
82+
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
83+
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
84+
; CHECK-GI-FP16-NEXT: fadd h1, h0, h1
9185
; CHECK-GI-FP16-NEXT: mov h0, v0.h[3]
9286
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
93-
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
9487
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
9588
; CHECK-GI-FP16-NEXT: ret
9689
%r = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %bin.rdx)
@@ -202,67 +195,42 @@ define half @add_H(<8 x half> %bin.rdx) {
202195
;
203196
; CHECK-GI-FP16-LABEL: add_H:
204197
; CHECK-GI-FP16: // %bb.0:
205-
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
206-
; CHECK-GI-FP16-NEXT: mov h2, v0.h[1]
207-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[2]
208-
; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
209-
; CHECK-GI-FP16-NEXT: fadd h1, h1, h0
210-
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
211-
; CHECK-GI-FP16-NEXT: mov h2, v0.h[3]
198+
; CHECK-GI-FP16-NEXT: mov h1, v0.h[2]
199+
; CHECK-GI-FP16-NEXT: faddp h2, v0.2h
200+
; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
201+
; CHECK-GI-FP16-NEXT: fadd h1, h2, h1
202+
; CHECK-GI-FP16-NEXT: mov h2, v0.h[4]
212203
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
213-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[4]
204+
; CHECK-GI-FP16-NEXT: mov h3, v0.h[5]
214205
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
215-
; CHECK-GI-FP16-NEXT: mov h2, v0.h[5]
216-
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
217-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
206+
; CHECK-GI-FP16-NEXT: mov h2, v0.h[6]
218207
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
219-
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
220208
; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
209+
; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
221210
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
222211
; CHECK-GI-FP16-NEXT: ret
223212
%r = call half @llvm.vector.reduce.fadd.f16.v8f16(half -0.0, <8 x half> %bin.rdx)
224213
ret half %r
225214
}
226215

227216
define float @add_S(<4 x float> %bin.rdx) {
228-
; CHECK-SD-LABEL: add_S:
229-
; CHECK-SD: // %bb.0:
230-
; CHECK-SD-NEXT: mov s1, v0.s[2]
231-
; CHECK-SD-NEXT: faddp s2, v0.2s
232-
; CHECK-SD-NEXT: mov s0, v0.s[3]
233-
; CHECK-SD-NEXT: fadd s1, s2, s1
234-
; CHECK-SD-NEXT: fadd s0, s1, s0
235-
; CHECK-SD-NEXT: ret
236-
;
237-
; CHECK-GI-LABEL: add_S:
238-
; CHECK-GI: // %bb.0:
239-
; CHECK-GI-NEXT: movi v1.2s, #128, lsl #24
240-
; CHECK-GI-NEXT: mov s2, v0.s[1]
241-
; CHECK-GI-NEXT: mov s3, v0.s[2]
242-
; CHECK-GI-NEXT: fadd s1, s1, s0
243-
; CHECK-GI-NEXT: mov s0, v0.s[3]
244-
; CHECK-GI-NEXT: fadd s1, s1, s2
245-
; CHECK-GI-NEXT: fadd s1, s1, s3
246-
; CHECK-GI-NEXT: fadd s0, s1, s0
247-
; CHECK-GI-NEXT: ret
217+
; CHECK-LABEL: add_S:
218+
; CHECK: // %bb.0:
219+
; CHECK-NEXT: mov s1, v0.s[2]
220+
; CHECK-NEXT: faddp s2, v0.2s
221+
; CHECK-NEXT: mov s0, v0.s[3]
222+
; CHECK-NEXT: fadd s1, s2, s1
223+
; CHECK-NEXT: fadd s0, s1, s0
224+
; CHECK-NEXT: ret
248225
%r = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %bin.rdx)
249226
ret float %r
250227
}
251228

252229
define double @add_D(<2 x double> %bin.rdx) {
253-
; CHECK-SD-LABEL: add_D:
254-
; CHECK-SD: // %bb.0:
255-
; CHECK-SD-NEXT: faddp d0, v0.2d
256-
; CHECK-SD-NEXT: ret
257-
;
258-
; CHECK-GI-LABEL: add_D:
259-
; CHECK-GI: // %bb.0:
260-
; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
261-
; CHECK-GI-NEXT: mov d2, v0.d[1]
262-
; CHECK-GI-NEXT: fmov d1, x8
263-
; CHECK-GI-NEXT: fadd d0, d1, d0
264-
; CHECK-GI-NEXT: fadd d0, d0, d2
265-
; CHECK-GI-NEXT: ret
230+
; CHECK-LABEL: add_D:
231+
; CHECK: // %bb.0:
232+
; CHECK-NEXT: faddp d0, v0.2d
233+
; CHECK-NEXT: ret
266234
%r = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %bin.rdx)
267235
ret double %r
268236
}
@@ -464,23 +432,19 @@ define half @add_2H(<16 x half> %bin.rdx) {
464432
;
465433
; CHECK-GI-FP16-LABEL: add_2H:
466434
; CHECK-GI-FP16: // %bb.0:
467-
; CHECK-GI-FP16-NEXT: adrp x8, .LCPI5_0
468-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[1]
469-
; CHECK-GI-FP16-NEXT: mov h4, v0.h[2]
470-
; CHECK-GI-FP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
471-
; CHECK-GI-FP16-NEXT: fadd h2, h2, h0
472-
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
473-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
435+
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
436+
; CHECK-GI-FP16-NEXT: faddp h3, v0.2h
437+
; CHECK-GI-FP16-NEXT: mov h4, v0.h[3]
438+
; CHECK-GI-FP16-NEXT: fadd h2, h3, h2
439+
; CHECK-GI-FP16-NEXT: mov h3, v0.h[4]
474440
; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
475-
; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
441+
; CHECK-GI-FP16-NEXT: mov h4, v0.h[5]
476442
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
477-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[5]
478-
; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
479-
; CHECK-GI-FP16-NEXT: mov h4, v0.h[6]
443+
; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
480444
; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
445+
; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
481446
; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
482447
; CHECK-GI-FP16-NEXT: mov h3, v1.h[2]
483-
; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
484448
; CHECK-GI-FP16-NEXT: fadd h0, h2, h0
485449
; CHECK-GI-FP16-NEXT: mov h2, v1.h[1]
486450
; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
@@ -502,95 +466,51 @@ define half @add_2H(<16 x half> %bin.rdx) {
502466
}
503467

504468
define float @add_2S(<8 x float> %bin.rdx) {
505-
; CHECK-SD-LABEL: add_2S:
506-
; CHECK-SD: // %bb.0:
507-
; CHECK-SD-NEXT: mov s2, v0.s[2]
508-
; CHECK-SD-NEXT: faddp s3, v0.2s
509-
; CHECK-SD-NEXT: mov s0, v0.s[3]
510-
; CHECK-SD-NEXT: fadd s2, s3, s2
511-
; CHECK-SD-NEXT: mov s3, v1.s[2]
512-
; CHECK-SD-NEXT: fadd s0, s2, s0
513-
; CHECK-SD-NEXT: mov s2, v1.s[1]
514-
; CHECK-SD-NEXT: fadd s0, s0, s1
515-
; CHECK-SD-NEXT: mov s1, v1.s[3]
516-
; CHECK-SD-NEXT: fadd s0, s0, s2
517-
; CHECK-SD-NEXT: fadd s0, s0, s3
518-
; CHECK-SD-NEXT: fadd s0, s0, s1
519-
; CHECK-SD-NEXT: ret
520-
;
521-
; CHECK-GI-LABEL: add_2S:
522-
; CHECK-GI: // %bb.0:
523-
; CHECK-GI-NEXT: movi v2.2s, #128, lsl #24
524-
; CHECK-GI-NEXT: mov s3, v0.s[1]
525-
; CHECK-GI-NEXT: mov s4, v0.s[2]
526-
; CHECK-GI-NEXT: fadd s2, s2, s0
527-
; CHECK-GI-NEXT: mov s0, v0.s[3]
528-
; CHECK-GI-NEXT: fadd s2, s2, s3
529-
; CHECK-GI-NEXT: mov s3, v1.s[2]
530-
; CHECK-GI-NEXT: fadd s2, s2, s4
531-
; CHECK-GI-NEXT: fadd s0, s2, s0
532-
; CHECK-GI-NEXT: mov s2, v1.s[1]
533-
; CHECK-GI-NEXT: fadd s0, s0, s1
534-
; CHECK-GI-NEXT: mov s1, v1.s[3]
535-
; CHECK-GI-NEXT: fadd s0, s0, s2
536-
; CHECK-GI-NEXT: fadd s0, s0, s3
537-
; CHECK-GI-NEXT: fadd s0, s0, s1
538-
; CHECK-GI-NEXT: ret
469+
; CHECK-LABEL: add_2S:
470+
; CHECK: // %bb.0:
471+
; CHECK-NEXT: mov s2, v0.s[2]
472+
; CHECK-NEXT: faddp s3, v0.2s
473+
; CHECK-NEXT: mov s0, v0.s[3]
474+
; CHECK-NEXT: fadd s2, s3, s2
475+
; CHECK-NEXT: mov s3, v1.s[2]
476+
; CHECK-NEXT: fadd s0, s2, s0
477+
; CHECK-NEXT: mov s2, v1.s[1]
478+
; CHECK-NEXT: fadd s0, s0, s1
479+
; CHECK-NEXT: mov s1, v1.s[3]
480+
; CHECK-NEXT: fadd s0, s0, s2
481+
; CHECK-NEXT: fadd s0, s0, s3
482+
; CHECK-NEXT: fadd s0, s0, s1
483+
; CHECK-NEXT: ret
539484
%r = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %bin.rdx)
540485
ret float %r
541486
}
542487

543488
define double @add_2D(<4 x double> %bin.rdx) {
544-
; CHECK-SD-LABEL: add_2D:
545-
; CHECK-SD: // %bb.0:
546-
; CHECK-SD-NEXT: faddp d0, v0.2d
547-
; CHECK-SD-NEXT: mov d2, v1.d[1]
548-
; CHECK-SD-NEXT: fadd d0, d0, d1
549-
; CHECK-SD-NEXT: fadd d0, d0, d2
550-
; CHECK-SD-NEXT: ret
551-
;
552-
; CHECK-GI-LABEL: add_2D:
553-
; CHECK-GI: // %bb.0:
554-
; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
555-
; CHECK-GI-NEXT: mov d3, v0.d[1]
556-
; CHECK-GI-NEXT: fmov d2, x8
557-
; CHECK-GI-NEXT: fadd d0, d2, d0
558-
; CHECK-GI-NEXT: mov d2, v1.d[1]
559-
; CHECK-GI-NEXT: fadd d0, d0, d3
560-
; CHECK-GI-NEXT: fadd d0, d0, d1
561-
; CHECK-GI-NEXT: fadd d0, d0, d2
562-
; CHECK-GI-NEXT: ret
489+
; CHECK-LABEL: add_2D:
490+
; CHECK: // %bb.0:
491+
; CHECK-NEXT: faddp d0, v0.2d
492+
; CHECK-NEXT: mov d2, v1.d[1]
493+
; CHECK-NEXT: fadd d0, d0, d1
494+
; CHECK-NEXT: fadd d0, d0, d2
495+
; CHECK-NEXT: ret
563496
%r = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %bin.rdx)
564497
ret double %r
565498
}
566499

567500
; Added at least one test where the start value is not -0.0.
568501
define float @add_S_init_42(<4 x float> %bin.rdx) {
569-
; CHECK-SD-LABEL: add_S_init_42:
570-
; CHECK-SD: // %bb.0:
571-
; CHECK-SD-NEXT: mov w8, #1109917696 // =0x42280000
572-
; CHECK-SD-NEXT: mov s2, v0.s[1]
573-
; CHECK-SD-NEXT: mov s3, v0.s[2]
574-
; CHECK-SD-NEXT: fmov s1, w8
575-
; CHECK-SD-NEXT: fadd s1, s0, s1
576-
; CHECK-SD-NEXT: mov s0, v0.s[3]
577-
; CHECK-SD-NEXT: fadd s1, s1, s2
578-
; CHECK-SD-NEXT: fadd s1, s1, s3
579-
; CHECK-SD-NEXT: fadd s0, s1, s0
580-
; CHECK-SD-NEXT: ret
581-
;
582-
; CHECK-GI-LABEL: add_S_init_42:
583-
; CHECK-GI: // %bb.0:
584-
; CHECK-GI-NEXT: mov w8, #1109917696 // =0x42280000
585-
; CHECK-GI-NEXT: mov s2, v0.s[1]
586-
; CHECK-GI-NEXT: mov s3, v0.s[2]
587-
; CHECK-GI-NEXT: fmov s1, w8
588-
; CHECK-GI-NEXT: fadd s1, s1, s0
589-
; CHECK-GI-NEXT: mov s0, v0.s[3]
590-
; CHECK-GI-NEXT: fadd s1, s1, s2
591-
; CHECK-GI-NEXT: fadd s1, s1, s3
592-
; CHECK-GI-NEXT: fadd s0, s1, s0
593-
; CHECK-GI-NEXT: ret
502+
; CHECK-LABEL: add_S_init_42:
503+
; CHECK: // %bb.0:
504+
; CHECK-NEXT: mov w8, #1109917696 // =0x42280000
505+
; CHECK-NEXT: mov s2, v0.s[1]
506+
; CHECK-NEXT: mov s3, v0.s[2]
507+
; CHECK-NEXT: fmov s1, w8
508+
; CHECK-NEXT: fadd s1, s0, s1
509+
; CHECK-NEXT: mov s0, v0.s[3]
510+
; CHECK-NEXT: fadd s1, s1, s2
511+
; CHECK-NEXT: fadd s1, s1, s3
512+
; CHECK-NEXT: fadd s0, s1, s0
513+
; CHECK-NEXT: ret
594514
%r = call float @llvm.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
595515
ret float %r
596516
}
@@ -604,5 +524,3 @@ declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
604524
declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
605525
declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
606526
declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
607-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
608-
; CHECK: {{.*}}

0 commit comments

Comments
 (0)