Skip to content

Commit c6f45f5

Browse files
wzssyqaYour Name
andauthored
PowerPC/VSX: Select FMINNUM and FMAXNUM (#135739)
In LangRef, we claim that FMINNUM and FMAXNUM should follow the minNum and maxNum operators in IEEE754-2008. PowerPC/VSX does have these instructions XSMINDP and XSMAXDP. Now we use FMINNUM_IEEE and FMAXNUM_IEEE, since they are used by the non-arch expand codes now. In future, we may replace all FMINNUM_IEEE/FMAXNUM_IEEE with FMINNUM and FMAXNUM. --------- Co-authored-by: Your Name <[email protected]>
1 parent 4125e73 commit c6f45f5

File tree

4 files changed

+291
-170
lines changed

4 files changed

+291
-170
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
794794
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
795795
setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
796796
setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
797+
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
798+
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
799+
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
800+
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
797801
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Legal);
798802
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Legal);
799803
}
@@ -828,8 +832,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
828832
}
829833

830834
if (Subtarget.hasVSX()) {
835+
setOperationAction(ISD::FMAXNUM_IEEE, VT, Legal);
836+
setOperationAction(ISD::FMINNUM_IEEE, VT, Legal);
831837
setOperationAction(ISD::FMAXNUM, VT, Legal);
832838
setOperationAction(ISD::FMINNUM, VT, Legal);
839+
setOperationAction(ISD::FCANONICALIZE, VT, Legal);
833840
}
834841

835842
// Vector instructions introduced in P8

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2757,14 +2757,26 @@ def : Pat<(v1i128 (vselect v1i128:$vA, v1i128:$vB, v1i128:$vC)),
27572757
(COPY_TO_REGCLASS $vB, VSRC),
27582758
(COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
27592759

2760+
def : Pat<(v4f32 (fmaxnum_ieee v4f32:$src1, v4f32:$src2)),
2761+
(v4f32 (XVMAXSP $src1, $src2))>;
27602762
def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)),
27612763
(v4f32 (XVMAXSP $src1, $src2))>;
2764+
def : Pat<(v4f32 (fminnum_ieee v4f32:$src1, v4f32:$src2)),
2765+
(v4f32 (XVMINSP $src1, $src2))>;
27622766
def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)),
27632767
(v4f32 (XVMINSP $src1, $src2))>;
2768+
def : Pat<(v4f32 (fcanonicalize v4f32:$src1)),
2769+
(v4f32 (XVMAXSP $src1, $src1))>;
2770+
def : Pat<(v2f64 (fmaxnum_ieee v2f64:$src1, v2f64:$src2)),
2771+
(v2f64 (XVMAXDP $src1, $src2))>;
27642772
def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
27652773
(v2f64 (XVMAXDP $src1, $src2))>;
2774+
def : Pat<(v2f64 (fminnum_ieee v2f64:$src1, v2f64:$src2)),
2775+
(v2f64 (XVMINDP $src1, $src2))>;
27662776
def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
27672777
(v2f64 (XVMINDP $src1, $src2))>;
2778+
def : Pat<(v2f64 (fcanonicalize v2f64:$src1)),
2779+
(v2f64 (XVMAXDP $src1, $src1))>;
27682780

27692781
// f32 abs
27702782
def : Pat<(f32 (fabs f32:$S)),
@@ -2776,43 +2788,29 @@ def : Pat<(f32 (fneg (fabs f32:$S))),
27762788
(f32 (COPY_TO_REGCLASS (XSNABSDP
27772789
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
27782790

2779-
// f32 Min.
2791+
// Max and Min
27802792
def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
27812793
(f32 FpMinMax.F32Min)>;
2782-
def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
2783-
(f32 FpMinMax.F32Min)>;
2784-
def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
2785-
(f32 FpMinMax.F32Min)>;
2786-
def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
2787-
(f32 FpMinMax.F32Min)>;
2788-
// F32 Max.
27892794
def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
27902795
(f32 FpMinMax.F32Max)>;
2791-
def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
2792-
(f32 FpMinMax.F32Max)>;
2793-
def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
2794-
(f32 FpMinMax.F32Max)>;
2795-
def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
2796+
def : Pat<(f32 (fminnum f32:$A, f32:$B)),
2797+
(f32 FpMinMax.F32Min)>;
2798+
def : Pat<(f32 (fmaxnum f32:$A, f32:$B)),
27962799
(f32 FpMinMax.F32Max)>;
2797-
2798-
// f64 Min.
2800+
def : Pat<(f32 (fcanonicalize f32:$A)),
2801+
(f32 (COPY_TO_REGCLASS (XSMAXDP
2802+
(COPY_TO_REGCLASS $A, VSFRC),
2803+
(COPY_TO_REGCLASS $A, VSFRC)), VSSRC))>;
27992804
def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
28002805
(f64 (XSMINDP $A, $B))>;
2801-
def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
2802-
(f64 (XSMINDP $A, $B))>;
2803-
def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
2804-
(f64 (XSMINDP $A, $B))>;
2805-
def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
2806-
(f64 (XSMINDP $A, $B))>;
2807-
// f64 Max.
28082806
def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
28092807
(f64 (XSMAXDP $A, $B))>;
2810-
def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
2811-
(f64 (XSMAXDP $A, $B))>;
2812-
def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
2813-
(f64 (XSMAXDP $A, $B))>;
2814-
def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
2808+
def : Pat<(f64 (fminnum f64:$A, f64:$B)),
2809+
(f64 (XSMINDP $A, $B))>;
2810+
def : Pat<(f64 (fmaxnum f64:$A, f64:$B)),
28152811
(f64 (XSMAXDP $A, $B))>;
2812+
def : Pat<(f64 (fcanonicalize f64:$A)),
2813+
(f64 (XSMAXDP $A, $A))>;
28162814

28172815
def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst),
28182816
(STXVD2X $rS, ForceXForm:$dst)>;

llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll

Lines changed: 34 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -301,44 +301,26 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
301301
; VSX-NEXT: xvcmpeqsp 1, 35, 35
302302
; VSX-NEXT: xvcmpeqsp 2, 34, 34
303303
; VSX-NEXT: addis 3, 2, .LCPI4_0@toc@ha
304-
; VSX-NEXT: xxleqv 36, 36, 36
305-
; VSX-NEXT: xvminsp 0, 34, 35
306-
; VSX-NEXT: vslw 4, 4, 4
307304
; VSX-NEXT: addi 3, 3, .LCPI4_0@toc@l
308305
; VSX-NEXT: xxlnor 1, 1, 1
309306
; VSX-NEXT: xxlnor 2, 2, 2
310-
; VSX-NEXT: vcmpequw 5, 2, 4
307+
; VSX-NEXT: xvminsp 0, 34, 35
311308
; VSX-NEXT: xxlor 1, 2, 1
312309
; VSX-NEXT: lxvd2x 2, 0, 3
313-
; VSX-NEXT: xxsel 0, 0, 2, 1
314-
; VSX-NEXT: xxlxor 2, 2, 2
315-
; VSX-NEXT: xvcmpeqsp 2, 0, 2
316-
; VSX-NEXT: xxsel 1, 0, 34, 37
317-
; VSX-NEXT: vcmpequw 2, 3, 4
318-
; VSX-NEXT: xxsel 1, 1, 35, 34
319-
; VSX-NEXT: xxsel 34, 0, 1, 2
310+
; VSX-NEXT: xxsel 34, 0, 2, 1
320311
; VSX-NEXT: blr
321312
;
322313
; AIX-LABEL: v4f32_minimum:
323314
; AIX: # %bb.0: # %entry
324315
; AIX-NEXT: xvcmpeqsp 1, 35, 35
325316
; AIX-NEXT: xvcmpeqsp 2, 34, 34
326317
; AIX-NEXT: ld 3, L..C4(2) # %const.0
327-
; AIX-NEXT: xxleqv 36, 36, 36
328318
; AIX-NEXT: xvminsp 0, 34, 35
329-
; AIX-NEXT: vslw 4, 4, 4
330319
; AIX-NEXT: xxlnor 1, 1, 1
331320
; AIX-NEXT: xxlnor 2, 2, 2
332-
; AIX-NEXT: vcmpequw 5, 2, 4
333321
; AIX-NEXT: xxlor 1, 2, 1
334322
; AIX-NEXT: lxvw4x 2, 0, 3
335-
; AIX-NEXT: xxsel 0, 0, 2, 1
336-
; AIX-NEXT: xxlxor 2, 2, 2
337-
; AIX-NEXT: xvcmpeqsp 2, 0, 2
338-
; AIX-NEXT: xxsel 1, 0, 34, 37
339-
; AIX-NEXT: vcmpequw 2, 3, 4
340-
; AIX-NEXT: xxsel 1, 1, 35, 34
341-
; AIX-NEXT: xxsel 34, 0, 1, 2
323+
; AIX-NEXT: xxsel 34, 0, 2, 1
342324
; AIX-NEXT: blr
343325
entry:
344326
%m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -377,16 +359,9 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
377359
; VSX-NEXT: xxlnor 1, 1, 1
378360
; VSX-NEXT: xxlnor 2, 2, 2
379361
; VSX-NEXT: xvmaxsp 0, 34, 35
380-
; VSX-NEXT: xxlxor 36, 36, 36
381-
; VSX-NEXT: vcmpequw 5, 2, 4
382362
; VSX-NEXT: xxlor 1, 2, 1
383363
; VSX-NEXT: lxvd2x 2, 0, 3
384-
; VSX-NEXT: xxsel 0, 0, 2, 1
385-
; VSX-NEXT: xvcmpeqsp 2, 0, 36
386-
; VSX-NEXT: xxsel 1, 0, 34, 37
387-
; VSX-NEXT: vcmpequw 2, 3, 4
388-
; VSX-NEXT: xxsel 1, 1, 35, 34
389-
; VSX-NEXT: xxsel 34, 0, 1, 2
364+
; VSX-NEXT: xxsel 34, 0, 2, 1
390365
; VSX-NEXT: blr
391366
;
392367
; AIX-LABEL: v4f32_maximum:
@@ -395,18 +370,11 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
395370
; AIX-NEXT: xvcmpeqsp 2, 34, 34
396371
; AIX-NEXT: ld 3, L..C5(2) # %const.0
397372
; AIX-NEXT: xvmaxsp 0, 34, 35
398-
; AIX-NEXT: xxlxor 36, 36, 36
399373
; AIX-NEXT: xxlnor 1, 1, 1
400374
; AIX-NEXT: xxlnor 2, 2, 2
401-
; AIX-NEXT: vcmpequw 5, 2, 4
402375
; AIX-NEXT: xxlor 1, 2, 1
403376
; AIX-NEXT: lxvw4x 2, 0, 3
404-
; AIX-NEXT: xxsel 0, 0, 2, 1
405-
; AIX-NEXT: xvcmpeqsp 2, 0, 36
406-
; AIX-NEXT: xxsel 1, 0, 34, 37
407-
; AIX-NEXT: vcmpequw 2, 3, 4
408-
; AIX-NEXT: xxsel 1, 1, 35, 34
409-
; AIX-NEXT: xxsel 34, 0, 1, 2
377+
; AIX-NEXT: xxsel 34, 0, 2, 1
410378
; AIX-NEXT: blr
411379
entry:
412380
%m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -493,47 +461,28 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
493461
; VSX-LABEL: v2f64_minimum:
494462
; VSX: # %bb.0: # %entry
495463
; VSX-NEXT: addis 3, 2, .LCPI6_0@toc@ha
496-
; VSX-NEXT: xvcmpeqdp 36, 35, 35
497-
; VSX-NEXT: xvcmpeqdp 37, 34, 34
498-
; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l
499-
; VSX-NEXT: xxlnor 36, 36, 36
500-
; VSX-NEXT: xxlnor 37, 37, 37
501464
; VSX-NEXT: xvmindp 0, 34, 35
465+
; VSX-NEXT: xvcmpeqdp 35, 35, 35
466+
; VSX-NEXT: addi 3, 3, .LCPI6_0@toc@l
467+
; VSX-NEXT: xvcmpeqdp 34, 34, 34
468+
; VSX-NEXT: xxlnor 35, 35, 35
469+
; VSX-NEXT: xxlnor 34, 34, 34
502470
; VSX-NEXT: lxvd2x 2, 0, 3
503-
; VSX-NEXT: addis 3, 2, .LCPI6_1@toc@ha
504-
; VSX-NEXT: xxlor 1, 37, 36
505-
; VSX-NEXT: addi 3, 3, .LCPI6_1@toc@l
506-
; VSX-NEXT: lxvd2x 36, 0, 3
507-
; VSX-NEXT: vcmpequd 5, 2, 4
508-
; VSX-NEXT: xxsel 0, 0, 2, 1
509-
; VSX-NEXT: xxlxor 2, 2, 2
510-
; VSX-NEXT: xxsel 1, 0, 34, 37
511-
; VSX-NEXT: vcmpequd 2, 3, 4
512-
; VSX-NEXT: xxsel 1, 1, 35, 34
513-
; VSX-NEXT: xvcmpeqdp 34, 0, 2
514-
; VSX-NEXT: xxsel 34, 0, 1, 34
471+
; VSX-NEXT: xxlor 1, 34, 35
472+
; VSX-NEXT: xxsel 34, 0, 2, 1
515473
; VSX-NEXT: blr
516474
;
517475
; AIX-LABEL: v2f64_minimum:
518476
; AIX: # %bb.0: # %entry
519477
; AIX-NEXT: ld 3, L..C6(2) # %const.0
520-
; AIX-NEXT: xvcmpeqdp 36, 35, 35
521-
; AIX-NEXT: xvcmpeqdp 37, 34, 34
522-
; AIX-NEXT: lxvd2x 2, 0, 3
523-
; AIX-NEXT: ld 3, L..C7(2) # %const.1
524-
; AIX-NEXT: xxlnor 36, 36, 36
525-
; AIX-NEXT: xxlnor 37, 37, 37
526478
; AIX-NEXT: xvmindp 0, 34, 35
527-
; AIX-NEXT: xxlor 1, 37, 36
528-
; AIX-NEXT: lxvd2x 36, 0, 3
529-
; AIX-NEXT: vcmpequd 5, 2, 4
530-
; AIX-NEXT: xxsel 0, 0, 2, 1
531-
; AIX-NEXT: xxlxor 2, 2, 2
532-
; AIX-NEXT: xxsel 1, 0, 34, 37
533-
; AIX-NEXT: vcmpequd 2, 3, 4
534-
; AIX-NEXT: xxsel 1, 1, 35, 34
535-
; AIX-NEXT: xvcmpeqdp 34, 0, 2
536-
; AIX-NEXT: xxsel 34, 0, 1, 34
479+
; AIX-NEXT: xvcmpeqdp 35, 35, 35
480+
; AIX-NEXT: lxvd2x 2, 0, 3
481+
; AIX-NEXT: xvcmpeqdp 34, 34, 34
482+
; AIX-NEXT: xxlnor 35, 35, 35
483+
; AIX-NEXT: xxlnor 34, 34, 34
484+
; AIX-NEXT: xxlor 1, 34, 35
485+
; AIX-NEXT: xxsel 34, 0, 2, 1
537486
; AIX-NEXT: blr
538487
entry:
539488
%m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
@@ -618,42 +567,28 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
618567
; VSX-LABEL: v2f64_maximum:
619568
; VSX: # %bb.0: # %entry
620569
; VSX-NEXT: addis 3, 2, .LCPI7_0@toc@ha
621-
; VSX-NEXT: xvcmpeqdp 36, 35, 35
622-
; VSX-NEXT: xvcmpeqdp 37, 34, 34
623-
; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l
624-
; VSX-NEXT: xxlnor 36, 36, 36
625-
; VSX-NEXT: xxlnor 37, 37, 37
626570
; VSX-NEXT: xvmaxdp 0, 34, 35
571+
; VSX-NEXT: xvcmpeqdp 35, 35, 35
572+
; VSX-NEXT: addi 3, 3, .LCPI7_0@toc@l
573+
; VSX-NEXT: xvcmpeqdp 34, 34, 34
574+
; VSX-NEXT: xxlnor 35, 35, 35
575+
; VSX-NEXT: xxlnor 34, 34, 34
627576
; VSX-NEXT: lxvd2x 2, 0, 3
628-
; VSX-NEXT: xxlor 1, 37, 36
629-
; VSX-NEXT: xxlxor 36, 36, 36
630-
; VSX-NEXT: vcmpequd 5, 2, 4
631-
; VSX-NEXT: xxsel 0, 0, 2, 1
632-
; VSX-NEXT: xxsel 1, 0, 34, 37
633-
; VSX-NEXT: vcmpequd 2, 3, 4
634-
; VSX-NEXT: xxsel 1, 1, 35, 34
635-
; VSX-NEXT: xvcmpeqdp 34, 0, 36
636-
; VSX-NEXT: xxsel 34, 0, 1, 34
577+
; VSX-NEXT: xxlor 1, 34, 35
578+
; VSX-NEXT: xxsel 34, 0, 2, 1
637579
; VSX-NEXT: blr
638580
;
639581
; AIX-LABEL: v2f64_maximum:
640582
; AIX: # %bb.0: # %entry
641-
; AIX-NEXT: ld 3, L..C8(2) # %const.0
642-
; AIX-NEXT: xvcmpeqdp 36, 35, 35
643-
; AIX-NEXT: xvcmpeqdp 37, 34, 34
644-
; AIX-NEXT: lxvd2x 2, 0, 3
645-
; AIX-NEXT: xxlnor 36, 36, 36
646-
; AIX-NEXT: xxlnor 37, 37, 37
583+
; AIX-NEXT: ld 3, L..C7(2) # %const.0
647584
; AIX-NEXT: xvmaxdp 0, 34, 35
648-
; AIX-NEXT: xxlor 1, 37, 36
649-
; AIX-NEXT: xxlxor 36, 36, 36
650-
; AIX-NEXT: vcmpequd 5, 2, 4
651-
; AIX-NEXT: xxsel 0, 0, 2, 1
652-
; AIX-NEXT: xxsel 1, 0, 34, 37
653-
; AIX-NEXT: vcmpequd 2, 3, 4
654-
; AIX-NEXT: xxsel 1, 1, 35, 34
655-
; AIX-NEXT: xvcmpeqdp 34, 0, 36
656-
; AIX-NEXT: xxsel 34, 0, 1, 34
585+
; AIX-NEXT: xvcmpeqdp 35, 35, 35
586+
; AIX-NEXT: lxvd2x 2, 0, 3
587+
; AIX-NEXT: xvcmpeqdp 34, 34, 34
588+
; AIX-NEXT: xxlnor 35, 35, 35
589+
; AIX-NEXT: xxlnor 34, 34, 34
590+
; AIX-NEXT: xxlor 1, 34, 35
591+
; AIX-NEXT: xxsel 34, 0, 2, 1
657592
; AIX-NEXT: blr
658593
entry:
659594
%m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)

0 commit comments

Comments
 (0)