Skip to content

Commit af0c523

Browse files
committed
Update bf16-combines checks
1 parent 91f71a6 commit af0c523

File tree

1 file changed

+80
-118
lines changed

1 file changed

+80
-118
lines changed

llvm/test/CodeGen/AArch64/sve-bf16-combines.ll

Lines changed: 80 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -414,28 +414,21 @@ define <vscale x 8 x bfloat> @fsub_sel_negzero_nxv8bf16(<vscale x 8 x bfloat> %a
414414
define <vscale x 8 x bfloat> @fadd_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
415415
; SVE-LABEL: fadd_sel_fmul_nxv8bf16:
416416
; SVE: // %bb.0:
417-
; SVE-NEXT: uunpkhi z3.s, z2.h
418-
; SVE-NEXT: uunpkhi z4.s, z1.h
419-
; SVE-NEXT: uunpklo z2.s, z2.h
420-
; SVE-NEXT: uunpklo z1.s, z1.h
417+
; SVE-NEXT: mov z3.s, #0x80000000
418+
; SVE-NEXT: mov z4.s, #0x80000000
421419
; SVE-NEXT: ptrue p1.s
422-
; SVE-NEXT: lsl z3.s, z3.s, #16
423-
; SVE-NEXT: lsl z4.s, z4.s, #16
424-
; SVE-NEXT: lsl z2.s, z2.s, #16
425-
; SVE-NEXT: lsl z1.s, z1.s, #16
426-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
427-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
428-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
429-
; SVE-NEXT: movi v3.2d, #0000000000000000
430-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
431-
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
432-
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
420+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
421+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
422+
; SVE-NEXT: movi v2.2d, #0000000000000000
423+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
433424
; SVE-NEXT: uunpkhi z3.s, z0.h
434425
; SVE-NEXT: uunpklo z0.s, z0.h
435-
; SVE-NEXT: uunpkhi z2.s, z1.h
436-
; SVE-NEXT: uunpklo z1.s, z1.h
426+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
437427
; SVE-NEXT: lsl z3.s, z3.s, #16
438428
; SVE-NEXT: lsl z0.s, z0.s, #16
429+
; SVE-NEXT: sel z1.h, p0, z1.h, z2.h
430+
; SVE-NEXT: uunpkhi z2.s, z1.h
431+
; SVE-NEXT: uunpklo z1.s, z1.h
439432
; SVE-NEXT: lsl z2.s, z2.s, #16
440433
; SVE-NEXT: lsl z1.s, z1.s, #16
441434
; SVE-NEXT: fadd z2.s, z3.s, z2.s
@@ -461,24 +454,20 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <
461454
define <vscale x 8 x bfloat> @fsub_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
462455
; SVE-LABEL: fsub_sel_fmul_nxv8bf16:
463456
; SVE: // %bb.0:
464-
; SVE-NEXT: uunpkhi z3.s, z2.h
465-
; SVE-NEXT: uunpkhi z4.s, z1.h
466-
; SVE-NEXT: uunpklo z2.s, z2.h
467-
; SVE-NEXT: uunpklo z1.s, z1.h
457+
; SVE-NEXT: mov z3.s, #0x80000000
458+
; SVE-NEXT: mov z4.s, #0x80000000
468459
; SVE-NEXT: ptrue p1.s
469-
; SVE-NEXT: lsl z3.s, z3.s, #16
470-
; SVE-NEXT: lsl z4.s, z4.s, #16
471-
; SVE-NEXT: lsl z2.s, z2.s, #16
472-
; SVE-NEXT: lsl z1.s, z1.s, #16
473-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
474-
; SVE-NEXT: uunpklo z4.s, z0.h
475-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
476-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
460+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
461+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
462+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
477463
; SVE-NEXT: uunpkhi z3.s, z0.h
464+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
465+
; SVE-NEXT: uunpklo z4.s, z0.h
466+
; SVE-NEXT: lsl z3.s, z3.s, #16
467+
; SVE-NEXT: uunpkhi z2.s, z1.h
468+
; SVE-NEXT: uunpklo z1.s, z1.h
478469
; SVE-NEXT: lsl z4.s, z4.s, #16
479-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
480470
; SVE-NEXT: lsl z2.s, z2.s, #16
481-
; SVE-NEXT: lsl z3.s, z3.s, #16
482471
; SVE-NEXT: lsl z1.s, z1.s, #16
483472
; SVE-NEXT: fsub z2.s, z3.s, z2.s
484473
; SVE-NEXT: fsub z1.s, z4.s, z1.s
@@ -503,24 +492,20 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <
503492
define <vscale x 8 x bfloat> @fadd_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
504493
; SVE-LABEL: fadd_sel_fmul_nsz_nxv8bf16:
505494
; SVE: // %bb.0:
506-
; SVE-NEXT: uunpkhi z3.s, z2.h
507-
; SVE-NEXT: uunpkhi z4.s, z1.h
508-
; SVE-NEXT: uunpklo z2.s, z2.h
509-
; SVE-NEXT: uunpklo z1.s, z1.h
495+
; SVE-NEXT: mov z3.s, #0x80000000
496+
; SVE-NEXT: mov z4.s, #0x80000000
510497
; SVE-NEXT: ptrue p1.s
511-
; SVE-NEXT: lsl z3.s, z3.s, #16
512-
; SVE-NEXT: lsl z4.s, z4.s, #16
513-
; SVE-NEXT: lsl z2.s, z2.s, #16
514-
; SVE-NEXT: lsl z1.s, z1.s, #16
515-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
516-
; SVE-NEXT: uunpklo z4.s, z0.h
517-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
518-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
498+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
499+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
500+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
519501
; SVE-NEXT: uunpkhi z3.s, z0.h
502+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
503+
; SVE-NEXT: uunpklo z4.s, z0.h
504+
; SVE-NEXT: lsl z3.s, z3.s, #16
505+
; SVE-NEXT: uunpkhi z2.s, z1.h
506+
; SVE-NEXT: uunpklo z1.s, z1.h
520507
; SVE-NEXT: lsl z4.s, z4.s, #16
521-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
522508
; SVE-NEXT: lsl z2.s, z2.s, #16
523-
; SVE-NEXT: lsl z3.s, z3.s, #16
524509
; SVE-NEXT: lsl z1.s, z1.s, #16
525510
; SVE-NEXT: fadd z2.s, z3.s, z2.s
526511
; SVE-NEXT: fadd z1.s, z4.s, z1.s
@@ -545,24 +530,20 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
545530
define <vscale x 8 x bfloat> @fsub_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
546531
; SVE-LABEL: fsub_sel_fmul_nsz_nxv8bf16:
547532
; SVE: // %bb.0:
548-
; SVE-NEXT: uunpkhi z3.s, z2.h
549-
; SVE-NEXT: uunpkhi z4.s, z1.h
550-
; SVE-NEXT: uunpklo z2.s, z2.h
551-
; SVE-NEXT: uunpklo z1.s, z1.h
533+
; SVE-NEXT: mov z3.s, #0x80000000
534+
; SVE-NEXT: mov z4.s, #0x80000000
552535
; SVE-NEXT: ptrue p1.s
553-
; SVE-NEXT: lsl z3.s, z3.s, #16
554-
; SVE-NEXT: lsl z4.s, z4.s, #16
555-
; SVE-NEXT: lsl z2.s, z2.s, #16
556-
; SVE-NEXT: lsl z1.s, z1.s, #16
557-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
558-
; SVE-NEXT: uunpklo z4.s, z0.h
559-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
560-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
536+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
537+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
538+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
561539
; SVE-NEXT: uunpkhi z3.s, z0.h
540+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
541+
; SVE-NEXT: uunpklo z4.s, z0.h
542+
; SVE-NEXT: lsl z3.s, z3.s, #16
543+
; SVE-NEXT: uunpkhi z2.s, z1.h
544+
; SVE-NEXT: uunpklo z1.s, z1.h
562545
; SVE-NEXT: lsl z4.s, z4.s, #16
563-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
564546
; SVE-NEXT: lsl z2.s, z2.s, #16
565-
; SVE-NEXT: lsl z3.s, z3.s, #16
566547
; SVE-NEXT: lsl z1.s, z1.s, #16
567548
; SVE-NEXT: fsub z2.s, z3.s, z2.s
568549
; SVE-NEXT: fsub z1.s, z4.s, z1.s
@@ -587,24 +568,20 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
587568
define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
588569
; SVE-LABEL: fadd_sel_fmul_negzero_nxv8bf16:
589570
; SVE: // %bb.0:
590-
; SVE-NEXT: uunpkhi z3.s, z2.h
591-
; SVE-NEXT: uunpkhi z4.s, z1.h
592-
; SVE-NEXT: uunpklo z2.s, z2.h
593-
; SVE-NEXT: uunpklo z1.s, z1.h
571+
; SVE-NEXT: mov z3.s, #0x80000000
572+
; SVE-NEXT: mov z4.s, #0x80000000
594573
; SVE-NEXT: ptrue p1.s
595-
; SVE-NEXT: lsl z3.s, z3.s, #16
596-
; SVE-NEXT: lsl z4.s, z4.s, #16
597-
; SVE-NEXT: lsl z2.s, z2.s, #16
598-
; SVE-NEXT: lsl z1.s, z1.s, #16
599-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
600-
; SVE-NEXT: uunpklo z4.s, z0.h
601-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
602-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
574+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
575+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
576+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
603577
; SVE-NEXT: uunpkhi z3.s, z0.h
578+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
579+
; SVE-NEXT: uunpklo z4.s, z0.h
580+
; SVE-NEXT: lsl z3.s, z3.s, #16
581+
; SVE-NEXT: uunpkhi z2.s, z1.h
582+
; SVE-NEXT: uunpklo z1.s, z1.h
604583
; SVE-NEXT: lsl z4.s, z4.s, #16
605-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
606584
; SVE-NEXT: lsl z2.s, z2.s, #16
607-
; SVE-NEXT: lsl z3.s, z3.s, #16
608585
; SVE-NEXT: lsl z1.s, z1.s, #16
609586
; SVE-NEXT: fadd z2.s, z3.s, z2.s
610587
; SVE-NEXT: fadd z1.s, z4.s, z1.s
@@ -630,28 +607,21 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
630607
define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
631608
; SVE-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
632609
; SVE: // %bb.0:
633-
; SVE-NEXT: uunpkhi z3.s, z2.h
634-
; SVE-NEXT: uunpkhi z4.s, z1.h
635-
; SVE-NEXT: uunpklo z2.s, z2.h
636-
; SVE-NEXT: uunpklo z1.s, z1.h
610+
; SVE-NEXT: mov z3.s, #0x80000000
611+
; SVE-NEXT: mov z4.s, #0x80000000
637612
; SVE-NEXT: ptrue p1.s
638-
; SVE-NEXT: lsl z3.s, z3.s, #16
639-
; SVE-NEXT: lsl z4.s, z4.s, #16
640-
; SVE-NEXT: lsl z2.s, z2.s, #16
641-
; SVE-NEXT: lsl z1.s, z1.s, #16
642-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
643-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
644-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
645-
; SVE-NEXT: dupm z3.h, #0x8000
646-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
647-
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
648-
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
613+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
614+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
615+
; SVE-NEXT: dupm z2.h, #0x8000
616+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
649617
; SVE-NEXT: uunpkhi z3.s, z0.h
650618
; SVE-NEXT: uunpklo z0.s, z0.h
651-
; SVE-NEXT: uunpkhi z2.s, z1.h
652-
; SVE-NEXT: uunpklo z1.s, z1.h
619+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
653620
; SVE-NEXT: lsl z3.s, z3.s, #16
654621
; SVE-NEXT: lsl z0.s, z0.s, #16
622+
; SVE-NEXT: sel z1.h, p0, z1.h, z2.h
623+
; SVE-NEXT: uunpkhi z2.s, z1.h
624+
; SVE-NEXT: uunpklo z1.s, z1.h
655625
; SVE-NEXT: lsl z2.s, z2.s, #16
656626
; SVE-NEXT: lsl z1.s, z1.s, #16
657627
; SVE-NEXT: fsub z2.s, z3.s, z2.s
@@ -678,24 +648,20 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
678648
define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
679649
; SVE-LABEL: fadd_sel_fmul_negzero_nsz_nxv8bf16:
680650
; SVE: // %bb.0:
681-
; SVE-NEXT: uunpkhi z3.s, z2.h
682-
; SVE-NEXT: uunpkhi z4.s, z1.h
683-
; SVE-NEXT: uunpklo z2.s, z2.h
684-
; SVE-NEXT: uunpklo z1.s, z1.h
651+
; SVE-NEXT: mov z3.s, #0x80000000
652+
; SVE-NEXT: mov z4.s, #0x80000000
685653
; SVE-NEXT: ptrue p1.s
686-
; SVE-NEXT: lsl z3.s, z3.s, #16
687-
; SVE-NEXT: lsl z4.s, z4.s, #16
688-
; SVE-NEXT: lsl z2.s, z2.s, #16
689-
; SVE-NEXT: lsl z1.s, z1.s, #16
690-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
691-
; SVE-NEXT: uunpklo z4.s, z0.h
692-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
693-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
654+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
655+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
656+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
694657
; SVE-NEXT: uunpkhi z3.s, z0.h
658+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
659+
; SVE-NEXT: uunpklo z4.s, z0.h
660+
; SVE-NEXT: lsl z3.s, z3.s, #16
661+
; SVE-NEXT: uunpkhi z2.s, z1.h
662+
; SVE-NEXT: uunpklo z1.s, z1.h
695663
; SVE-NEXT: lsl z4.s, z4.s, #16
696-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
697664
; SVE-NEXT: lsl z2.s, z2.s, #16
698-
; SVE-NEXT: lsl z3.s, z3.s, #16
699665
; SVE-NEXT: lsl z1.s, z1.s, #16
700666
; SVE-NEXT: fadd z2.s, z3.s, z2.s
701667
; SVE-NEXT: fadd z1.s, z4.s, z1.s
@@ -721,24 +687,20 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x b
721687
define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
722688
; SVE-LABEL: fsub_sel_fmul_negzero_nsz_nxv8bf16:
723689
; SVE: // %bb.0:
724-
; SVE-NEXT: uunpkhi z3.s, z2.h
725-
; SVE-NEXT: uunpkhi z4.s, z1.h
726-
; SVE-NEXT: uunpklo z2.s, z2.h
727-
; SVE-NEXT: uunpklo z1.s, z1.h
690+
; SVE-NEXT: mov z3.s, #0x80000000
691+
; SVE-NEXT: mov z4.s, #0x80000000
728692
; SVE-NEXT: ptrue p1.s
729-
; SVE-NEXT: lsl z3.s, z3.s, #16
730-
; SVE-NEXT: lsl z4.s, z4.s, #16
731-
; SVE-NEXT: lsl z2.s, z2.s, #16
732-
; SVE-NEXT: lsl z1.s, z1.s, #16
733-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
734-
; SVE-NEXT: uunpklo z4.s, z0.h
735-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
736-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
693+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
694+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
695+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
737696
; SVE-NEXT: uunpkhi z3.s, z0.h
697+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
698+
; SVE-NEXT: uunpklo z4.s, z0.h
699+
; SVE-NEXT: lsl z3.s, z3.s, #16
700+
; SVE-NEXT: uunpkhi z2.s, z1.h
701+
; SVE-NEXT: uunpklo z1.s, z1.h
738702
; SVE-NEXT: lsl z4.s, z4.s, #16
739-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
740703
; SVE-NEXT: lsl z2.s, z2.s, #16
741-
; SVE-NEXT: lsl z3.s, z3.s, #16
742704
; SVE-NEXT: lsl z1.s, z1.s, #16
743705
; SVE-NEXT: fsub z2.s, z3.s, z2.s
744706
; SVE-NEXT: fsub z1.s, z4.s, z1.s

0 commit comments

Comments
 (0)