Skip to content

Commit bf92acc

Browse files
HolyMolyCowMangithub-actions[bot]
authored andcommitted
Automerge: [AArch64][GlobalISel] Legalize more G_VECREDUCE_FMIN/FMAX operations. (#159082)
Enabling scalarisation for `G_VECREDUCE_FMIN` and `G_VECREDUCE_FMAX` allows for more instructions to be handled such as `v2fp128`.
2 parents 5c62acb + d90a313 commit bf92acc

File tree

6 files changed

+422
-129
lines changed

6 files changed

+422
-129
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
13481348
.clampMaxNumElements(1, s64, 2)
13491349
.clampMaxNumElements(1, s32, 4)
13501350
.clampMaxNumElements(1, s16, 8)
1351+
.scalarize(1)
13511352
.lower();
13521353

13531354
getActionDefinitionsBuilder(G_VECREDUCE_MUL)

llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll

Lines changed: 83 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,9 @@
66

77
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
88
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
9-
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
10-
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
11-
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
129
;
1310
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
1411
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
15-
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
16-
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
17-
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
1812

1913
declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
2014
declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
@@ -557,33 +551,99 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
557551
}
558552

559553
define float @test_v3f32(<3 x float> %a) nounwind {
560-
; CHECK-LABEL: test_v3f32:
561-
; CHECK: // %bb.0:
562-
; CHECK-NEXT: mov w8, #-4194304 // =0xffc00000
563-
; CHECK-NEXT: fmov s1, w8
564-
; CHECK-NEXT: mov v0.s[3], v1.s[0]
565-
; CHECK-NEXT: fmaxnmv s0, v0.4s
566-
; CHECK-NEXT: ret
554+
; CHECK-NOFP-SD-LABEL: test_v3f32:
555+
; CHECK-NOFP-SD: // %bb.0:
556+
; CHECK-NOFP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
557+
; CHECK-NOFP-SD-NEXT: fmov s1, w8
558+
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
559+
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
560+
; CHECK-NOFP-SD-NEXT: ret
561+
;
562+
; CHECK-FP-SD-LABEL: test_v3f32:
563+
; CHECK-FP-SD: // %bb.0:
564+
; CHECK-FP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
565+
; CHECK-FP-SD-NEXT: fmov s1, w8
566+
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
567+
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
568+
; CHECK-FP-SD-NEXT: ret
569+
;
570+
; CHECK-NOFP-GI-LABEL: test_v3f32:
571+
; CHECK-NOFP-GI: // %bb.0:
572+
; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
573+
; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
574+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
575+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
576+
; CHECK-NOFP-GI-NEXT: ret
577+
;
578+
; CHECK-FP-GI-LABEL: test_v3f32:
579+
; CHECK-FP-GI: // %bb.0:
580+
; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
581+
; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
582+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
583+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
584+
; CHECK-FP-GI-NEXT: ret
567585
%b = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
568586
ret float %b
569587
}
570588

571589
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
572-
; CHECK-LABEL: test_v3f32_ninf:
573-
; CHECK: // %bb.0:
574-
; CHECK-NEXT: mov w8, #-4194304 // =0xffc00000
575-
; CHECK-NEXT: fmov s1, w8
576-
; CHECK-NEXT: mov v0.s[3], v1.s[0]
577-
; CHECK-NEXT: fmaxnmv s0, v0.4s
578-
; CHECK-NEXT: ret
590+
; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
591+
; CHECK-NOFP-SD: // %bb.0:
592+
; CHECK-NOFP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
593+
; CHECK-NOFP-SD-NEXT: fmov s1, w8
594+
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
595+
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
596+
; CHECK-NOFP-SD-NEXT: ret
597+
;
598+
; CHECK-FP-SD-LABEL: test_v3f32_ninf:
599+
; CHECK-FP-SD: // %bb.0:
600+
; CHECK-FP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
601+
; CHECK-FP-SD-NEXT: fmov s1, w8
602+
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
603+
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
604+
; CHECK-FP-SD-NEXT: ret
605+
;
606+
; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
607+
; CHECK-NOFP-GI: // %bb.0:
608+
; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
609+
; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
610+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
611+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
612+
; CHECK-NOFP-GI-NEXT: ret
613+
;
614+
; CHECK-FP-GI-LABEL: test_v3f32_ninf:
615+
; CHECK-FP-GI: // %bb.0:
616+
; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
617+
; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
618+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
619+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
620+
; CHECK-FP-GI-NEXT: ret
579621
%b = call ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
580622
ret float %b
581623
}
582624

583625
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
584-
; CHECK-LABEL: test_v2f128:
585-
; CHECK: // %bb.0:
586-
; CHECK-NEXT: b fmaxl
626+
; CHECK-NOFP-SD-LABEL: test_v2f128:
627+
; CHECK-NOFP-SD: // %bb.0:
628+
; CHECK-NOFP-SD-NEXT: b fmaxl
629+
;
630+
; CHECK-FP-SD-LABEL: test_v2f128:
631+
; CHECK-FP-SD: // %bb.0:
632+
; CHECK-FP-SD-NEXT: b fmaxl
633+
;
634+
; CHECK-NOFP-GI-LABEL: test_v2f128:
635+
; CHECK-NOFP-GI: // %bb.0:
636+
; CHECK-NOFP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
637+
; CHECK-NOFP-GI-NEXT: bl fmaxl
638+
; CHECK-NOFP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
639+
; CHECK-NOFP-GI-NEXT: ret
640+
;
641+
; CHECK-FP-GI-LABEL: test_v2f128:
642+
; CHECK-FP-GI: // %bb.0:
643+
; CHECK-FP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
644+
; CHECK-FP-GI-NEXT: bl fmaxl
645+
; CHECK-FP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
646+
; CHECK-FP-GI-NEXT: ret
587647
%b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
588648
ret fp128 %b
589649
}

llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll

Lines changed: 107 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,9 @@
66

77
; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
88
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
9-
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
10-
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
11-
; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
129
;
1310
; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
1411
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
15-
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
16-
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
17-
; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
1812

1913
declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
2014
declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
@@ -557,45 +551,123 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
557551
}
558552

559553
define float @test_v3f32(<3 x float> %a) nounwind {
560-
; CHECK-LABEL: test_v3f32:
561-
; CHECK: // %bb.0:
562-
; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
563-
; CHECK-NEXT: fmov s1, w8
564-
; CHECK-NEXT: mov v0.s[3], v1.s[0]
565-
; CHECK-NEXT: fmaxnmv s0, v0.4s
566-
; CHECK-NEXT: ret
554+
; CHECK-NOFP-SD-LABEL: test_v3f32:
555+
; CHECK-NOFP-SD: // %bb.0:
556+
; CHECK-NOFP-SD-NEXT: mov w8, #-8388608 // =0xff800000
557+
; CHECK-NOFP-SD-NEXT: fmov s1, w8
558+
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
559+
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
560+
; CHECK-NOFP-SD-NEXT: ret
561+
;
562+
; CHECK-FP-SD-LABEL: test_v3f32:
563+
; CHECK-FP-SD: // %bb.0:
564+
; CHECK-FP-SD-NEXT: mov w8, #-8388608 // =0xff800000
565+
; CHECK-FP-SD-NEXT: fmov s1, w8
566+
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
567+
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
568+
; CHECK-FP-SD-NEXT: ret
569+
;
570+
; CHECK-NOFP-GI-LABEL: test_v3f32:
571+
; CHECK-NOFP-GI: // %bb.0:
572+
; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
573+
; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
574+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
575+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
576+
; CHECK-NOFP-GI-NEXT: ret
577+
;
578+
; CHECK-FP-GI-LABEL: test_v3f32:
579+
; CHECK-FP-GI: // %bb.0:
580+
; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
581+
; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
582+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
583+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
584+
; CHECK-FP-GI-NEXT: ret
567585
%b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
568586
ret float %b
569587
}
570588

571589
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
572-
; CHECK-LABEL: test_v3f32_ninf:
573-
; CHECK: // %bb.0:
574-
; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff
575-
; CHECK-NEXT: fmov s1, w8
576-
; CHECK-NEXT: mov v0.s[3], v1.s[0]
577-
; CHECK-NEXT: fmaxnmv s0, v0.4s
578-
; CHECK-NEXT: ret
590+
; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
591+
; CHECK-NOFP-SD: // %bb.0:
592+
; CHECK-NOFP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
593+
; CHECK-NOFP-SD-NEXT: fmov s1, w8
594+
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
595+
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
596+
; CHECK-NOFP-SD-NEXT: ret
597+
;
598+
; CHECK-FP-SD-LABEL: test_v3f32_ninf:
599+
; CHECK-FP-SD: // %bb.0:
600+
; CHECK-FP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
601+
; CHECK-FP-SD-NEXT: fmov s1, w8
602+
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
603+
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
604+
; CHECK-FP-SD-NEXT: ret
605+
;
606+
; CHECK-NOFP-GI-LABEL: test_v3f32_ninf:
607+
; CHECK-NOFP-GI: // %bb.0:
608+
; CHECK-NOFP-GI-NEXT: mov s1, v0.s[1]
609+
; CHECK-NOFP-GI-NEXT: mov s2, v0.s[2]
610+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s1
611+
; CHECK-NOFP-GI-NEXT: fmaxnm s0, s0, s2
612+
; CHECK-NOFP-GI-NEXT: ret
613+
;
614+
; CHECK-FP-GI-LABEL: test_v3f32_ninf:
615+
; CHECK-FP-GI: // %bb.0:
616+
; CHECK-FP-GI-NEXT: mov s1, v0.s[1]
617+
; CHECK-FP-GI-NEXT: mov s2, v0.s[2]
618+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s1
619+
; CHECK-FP-GI-NEXT: fmaxnm s0, s0, s2
620+
; CHECK-FP-GI-NEXT: ret
579621
%b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
580622
ret float %b
581623
}
582624

583625
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
584-
; CHECK-LABEL: test_v2f128:
585-
; CHECK: // %bb.0:
586-
; CHECK-NEXT: sub sp, sp, #48
587-
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
588-
; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
589-
; CHECK-NEXT: bl __gttf2
590-
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
591-
; CHECK-NEXT: cmp w0, #0
592-
; CHECK-NEXT: b.le .LBB18_2
593-
; CHECK-NEXT: // %bb.1:
594-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
595-
; CHECK-NEXT: .LBB18_2:
596-
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
597-
; CHECK-NEXT: add sp, sp, #48
598-
; CHECK-NEXT: ret
626+
; CHECK-NOFP-SD-LABEL: test_v2f128:
627+
; CHECK-NOFP-SD: // %bb.0:
628+
; CHECK-NOFP-SD-NEXT: sub sp, sp, #48
629+
; CHECK-NOFP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
630+
; CHECK-NOFP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
631+
; CHECK-NOFP-SD-NEXT: bl __gttf2
632+
; CHECK-NOFP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
633+
; CHECK-NOFP-SD-NEXT: cmp w0, #0
634+
; CHECK-NOFP-SD-NEXT: b.le .LBB18_2
635+
; CHECK-NOFP-SD-NEXT: // %bb.1:
636+
; CHECK-NOFP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
637+
; CHECK-NOFP-SD-NEXT: .LBB18_2:
638+
; CHECK-NOFP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
639+
; CHECK-NOFP-SD-NEXT: add sp, sp, #48
640+
; CHECK-NOFP-SD-NEXT: ret
641+
;
642+
; CHECK-FP-SD-LABEL: test_v2f128:
643+
; CHECK-FP-SD: // %bb.0:
644+
; CHECK-FP-SD-NEXT: sub sp, sp, #48
645+
; CHECK-FP-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
646+
; CHECK-FP-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
647+
; CHECK-FP-SD-NEXT: bl __gttf2
648+
; CHECK-FP-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
649+
; CHECK-FP-SD-NEXT: cmp w0, #0
650+
; CHECK-FP-SD-NEXT: b.le .LBB18_2
651+
; CHECK-FP-SD-NEXT: // %bb.1:
652+
; CHECK-FP-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
653+
; CHECK-FP-SD-NEXT: .LBB18_2:
654+
; CHECK-FP-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
655+
; CHECK-FP-SD-NEXT: add sp, sp, #48
656+
; CHECK-FP-SD-NEXT: ret
657+
;
658+
; CHECK-NOFP-GI-LABEL: test_v2f128:
659+
; CHECK-NOFP-GI: // %bb.0:
660+
; CHECK-NOFP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
661+
; CHECK-NOFP-GI-NEXT: bl fmaxl
662+
; CHECK-NOFP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
663+
; CHECK-NOFP-GI-NEXT: ret
664+
;
665+
; CHECK-FP-GI-LABEL: test_v2f128:
666+
; CHECK-FP-GI: // %bb.0:
667+
; CHECK-FP-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
668+
; CHECK-FP-GI-NEXT: bl fmaxl
669+
; CHECK-FP-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
670+
; CHECK-FP-GI-NEXT: ret
599671
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
600672
ret fp128 %b
601673
}

0 commit comments

Comments
 (0)