Skip to content

Commit cb0f859

Browse files
committed
Update bf16-combines checks
1 parent 3a5d7b3 commit cb0f859

File tree

1 file changed

+90
-120
lines changed

1 file changed

+90
-120
lines changed

llvm/test/CodeGen/AArch64/sve-bf16-combines.ll

Lines changed: 90 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -414,28 +414,22 @@ define <vscale x 8 x bfloat> @fsub_sel_negzero_nxv8bf16(<vscale x 8 x bfloat> %a
414414
define <vscale x 8 x bfloat> @fadd_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
415415
; SVE-LABEL: fadd_sel_fmul_nxv8bf16:
416416
; SVE: // %bb.0:
417-
; SVE-NEXT: uunpkhi z3.s, z2.h
418-
; SVE-NEXT: uunpkhi z4.s, z1.h
419-
; SVE-NEXT: uunpklo z2.s, z2.h
420-
; SVE-NEXT: uunpklo z1.s, z1.h
417+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
421418
; SVE-NEXT: ptrue p1.s
422-
; SVE-NEXT: lsl z3.s, z3.s, #16
423-
; SVE-NEXT: lsl z4.s, z4.s, #16
424-
; SVE-NEXT: lsl z2.s, z2.s, #16
425-
; SVE-NEXT: lsl z1.s, z1.s, #16
426-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
427-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
428-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
429-
; SVE-NEXT: movi v3.2d, #0000000000000000
430-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
431-
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
432-
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
419+
; SVE-NEXT: mov z3.s, w8
420+
; SVE-NEXT: mov z4.d, z3.d
421+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
422+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
423+
; SVE-NEXT: movi v2.2d, #0000000000000000
424+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
433425
; SVE-NEXT: uunpkhi z3.s, z0.h
434426
; SVE-NEXT: uunpklo z0.s, z0.h
435-
; SVE-NEXT: uunpkhi z2.s, z1.h
436-
; SVE-NEXT: uunpklo z1.s, z1.h
427+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
437428
; SVE-NEXT: lsl z3.s, z3.s, #16
438429
; SVE-NEXT: lsl z0.s, z0.s, #16
430+
; SVE-NEXT: sel z1.h, p0, z1.h, z2.h
431+
; SVE-NEXT: uunpkhi z2.s, z1.h
432+
; SVE-NEXT: uunpklo z1.s, z1.h
439433
; SVE-NEXT: lsl z2.s, z2.s, #16
440434
; SVE-NEXT: lsl z1.s, z1.s, #16
441435
; SVE-NEXT: fadd z2.s, z3.s, z2.s
@@ -461,24 +455,21 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <
461455
define <vscale x 8 x bfloat> @fsub_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
462456
; SVE-LABEL: fsub_sel_fmul_nxv8bf16:
463457
; SVE: // %bb.0:
464-
; SVE-NEXT: uunpkhi z3.s, z2.h
465-
; SVE-NEXT: uunpkhi z4.s, z1.h
466-
; SVE-NEXT: uunpklo z2.s, z2.h
467-
; SVE-NEXT: uunpklo z1.s, z1.h
458+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
468459
; SVE-NEXT: ptrue p1.s
469-
; SVE-NEXT: lsl z3.s, z3.s, #16
470-
; SVE-NEXT: lsl z4.s, z4.s, #16
471-
; SVE-NEXT: lsl z2.s, z2.s, #16
472-
; SVE-NEXT: lsl z1.s, z1.s, #16
473-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
474-
; SVE-NEXT: uunpklo z4.s, z0.h
475-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
476-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
460+
; SVE-NEXT: mov z3.s, w8
461+
; SVE-NEXT: mov z4.d, z3.d
462+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
463+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
464+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
477465
; SVE-NEXT: uunpkhi z3.s, z0.h
466+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
467+
; SVE-NEXT: uunpklo z4.s, z0.h
468+
; SVE-NEXT: lsl z3.s, z3.s, #16
469+
; SVE-NEXT: uunpkhi z2.s, z1.h
470+
; SVE-NEXT: uunpklo z1.s, z1.h
478471
; SVE-NEXT: lsl z4.s, z4.s, #16
479-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
480472
; SVE-NEXT: lsl z2.s, z2.s, #16
481-
; SVE-NEXT: lsl z3.s, z3.s, #16
482473
; SVE-NEXT: lsl z1.s, z1.s, #16
483474
; SVE-NEXT: fsub z2.s, z3.s, z2.s
484475
; SVE-NEXT: fsub z1.s, z4.s, z1.s
@@ -503,24 +494,21 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <
503494
define <vscale x 8 x bfloat> @fadd_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
504495
; SVE-LABEL: fadd_sel_fmul_nsz_nxv8bf16:
505496
; SVE: // %bb.0:
506-
; SVE-NEXT: uunpkhi z3.s, z2.h
507-
; SVE-NEXT: uunpkhi z4.s, z1.h
508-
; SVE-NEXT: uunpklo z2.s, z2.h
509-
; SVE-NEXT: uunpklo z1.s, z1.h
497+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
510498
; SVE-NEXT: ptrue p1.s
511-
; SVE-NEXT: lsl z3.s, z3.s, #16
512-
; SVE-NEXT: lsl z4.s, z4.s, #16
513-
; SVE-NEXT: lsl z2.s, z2.s, #16
514-
; SVE-NEXT: lsl z1.s, z1.s, #16
515-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
516-
; SVE-NEXT: uunpklo z4.s, z0.h
517-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
518-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
499+
; SVE-NEXT: mov z3.s, w8
500+
; SVE-NEXT: mov z4.d, z3.d
501+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
502+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
503+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
519504
; SVE-NEXT: uunpkhi z3.s, z0.h
505+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
506+
; SVE-NEXT: uunpklo z4.s, z0.h
507+
; SVE-NEXT: lsl z3.s, z3.s, #16
508+
; SVE-NEXT: uunpkhi z2.s, z1.h
509+
; SVE-NEXT: uunpklo z1.s, z1.h
520510
; SVE-NEXT: lsl z4.s, z4.s, #16
521-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
522511
; SVE-NEXT: lsl z2.s, z2.s, #16
523-
; SVE-NEXT: lsl z3.s, z3.s, #16
524512
; SVE-NEXT: lsl z1.s, z1.s, #16
525513
; SVE-NEXT: fadd z2.s, z3.s, z2.s
526514
; SVE-NEXT: fadd z1.s, z4.s, z1.s
@@ -545,24 +533,21 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
545533
define <vscale x 8 x bfloat> @fsub_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
546534
; SVE-LABEL: fsub_sel_fmul_nsz_nxv8bf16:
547535
; SVE: // %bb.0:
548-
; SVE-NEXT: uunpkhi z3.s, z2.h
549-
; SVE-NEXT: uunpkhi z4.s, z1.h
550-
; SVE-NEXT: uunpklo z2.s, z2.h
551-
; SVE-NEXT: uunpklo z1.s, z1.h
536+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
552537
; SVE-NEXT: ptrue p1.s
553-
; SVE-NEXT: lsl z3.s, z3.s, #16
554-
; SVE-NEXT: lsl z4.s, z4.s, #16
555-
; SVE-NEXT: lsl z2.s, z2.s, #16
556-
; SVE-NEXT: lsl z1.s, z1.s, #16
557-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
558-
; SVE-NEXT: uunpklo z4.s, z0.h
559-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
560-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
538+
; SVE-NEXT: mov z3.s, w8
539+
; SVE-NEXT: mov z4.d, z3.d
540+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
541+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
542+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
561543
; SVE-NEXT: uunpkhi z3.s, z0.h
544+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
545+
; SVE-NEXT: uunpklo z4.s, z0.h
546+
; SVE-NEXT: lsl z3.s, z3.s, #16
547+
; SVE-NEXT: uunpkhi z2.s, z1.h
548+
; SVE-NEXT: uunpklo z1.s, z1.h
562549
; SVE-NEXT: lsl z4.s, z4.s, #16
563-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
564550
; SVE-NEXT: lsl z2.s, z2.s, #16
565-
; SVE-NEXT: lsl z3.s, z3.s, #16
566551
; SVE-NEXT: lsl z1.s, z1.s, #16
567552
; SVE-NEXT: fsub z2.s, z3.s, z2.s
568553
; SVE-NEXT: fsub z1.s, z4.s, z1.s
@@ -587,24 +572,21 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
587572
define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
588573
; SVE-LABEL: fadd_sel_fmul_negzero_nxv8bf16:
589574
; SVE: // %bb.0:
590-
; SVE-NEXT: uunpkhi z3.s, z2.h
591-
; SVE-NEXT: uunpkhi z4.s, z1.h
592-
; SVE-NEXT: uunpklo z2.s, z2.h
593-
; SVE-NEXT: uunpklo z1.s, z1.h
575+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
594576
; SVE-NEXT: ptrue p1.s
595-
; SVE-NEXT: lsl z3.s, z3.s, #16
596-
; SVE-NEXT: lsl z4.s, z4.s, #16
597-
; SVE-NEXT: lsl z2.s, z2.s, #16
598-
; SVE-NEXT: lsl z1.s, z1.s, #16
599-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
600-
; SVE-NEXT: uunpklo z4.s, z0.h
601-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
602-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
577+
; SVE-NEXT: mov z3.s, w8
578+
; SVE-NEXT: mov z4.d, z3.d
579+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
580+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
581+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
603582
; SVE-NEXT: uunpkhi z3.s, z0.h
583+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
584+
; SVE-NEXT: uunpklo z4.s, z0.h
585+
; SVE-NEXT: lsl z3.s, z3.s, #16
586+
; SVE-NEXT: uunpkhi z2.s, z1.h
587+
; SVE-NEXT: uunpklo z1.s, z1.h
604588
; SVE-NEXT: lsl z4.s, z4.s, #16
605-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
606589
; SVE-NEXT: lsl z2.s, z2.s, #16
607-
; SVE-NEXT: lsl z3.s, z3.s, #16
608590
; SVE-NEXT: lsl z1.s, z1.s, #16
609591
; SVE-NEXT: fadd z2.s, z3.s, z2.s
610592
; SVE-NEXT: fadd z1.s, z4.s, z1.s
@@ -630,30 +612,24 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
630612
define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
631613
; SVE-LABEL: fsub_sel_fmul_negzero_nxv8bf16:
632614
; SVE: // %bb.0:
633-
; SVE-NEXT: uunpkhi z3.s, z2.h
634-
; SVE-NEXT: uunpkhi z4.s, z1.h
635-
; SVE-NEXT: mov w8, #32768 // =0x8000
636-
; SVE-NEXT: uunpklo z2.s, z2.h
637-
; SVE-NEXT: uunpklo z1.s, z1.h
615+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
638616
; SVE-NEXT: ptrue p1.s
639-
; SVE-NEXT: lsl z3.s, z3.s, #16
640-
; SVE-NEXT: lsl z4.s, z4.s, #16
641-
; SVE-NEXT: lsl z2.s, z2.s, #16
642-
; SVE-NEXT: lsl z1.s, z1.s, #16
643-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
644-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
645-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
646-
; SVE-NEXT: fmov h3, w8
647-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
648-
; SVE-NEXT: mov z3.h, h3
649-
; SVE-NEXT: uzp1 z1.h, z1.h, z2.h
650-
; SVE-NEXT: sel z1.h, p0, z1.h, z3.h
617+
; SVE-NEXT: mov z3.s, w8
618+
; SVE-NEXT: mov w8, #32768 // =0x8000
619+
; SVE-NEXT: mov z4.d, z3.d
620+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
621+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
622+
; SVE-NEXT: fmov h2, w8
623+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
651624
; SVE-NEXT: uunpkhi z3.s, z0.h
652625
; SVE-NEXT: uunpklo z0.s, z0.h
653-
; SVE-NEXT: uunpkhi z2.s, z1.h
654-
; SVE-NEXT: uunpklo z1.s, z1.h
626+
; SVE-NEXT: mov z2.h, h2
627+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
655628
; SVE-NEXT: lsl z3.s, z3.s, #16
656629
; SVE-NEXT: lsl z0.s, z0.s, #16
630+
; SVE-NEXT: sel z1.h, p0, z1.h, z2.h
631+
; SVE-NEXT: uunpkhi z2.s, z1.h
632+
; SVE-NEXT: uunpklo z1.s, z1.h
657633
; SVE-NEXT: lsl z2.s, z2.s, #16
658634
; SVE-NEXT: lsl z1.s, z1.s, #16
659635
; SVE-NEXT: fsub z2.s, z3.s, z2.s
@@ -682,24 +658,21 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
682658
define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
683659
; SVE-LABEL: fadd_sel_fmul_negzero_nsz_nxv8bf16:
684660
; SVE: // %bb.0:
685-
; SVE-NEXT: uunpkhi z3.s, z2.h
686-
; SVE-NEXT: uunpkhi z4.s, z1.h
687-
; SVE-NEXT: uunpklo z2.s, z2.h
688-
; SVE-NEXT: uunpklo z1.s, z1.h
661+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
689662
; SVE-NEXT: ptrue p1.s
690-
; SVE-NEXT: lsl z3.s, z3.s, #16
691-
; SVE-NEXT: lsl z4.s, z4.s, #16
692-
; SVE-NEXT: lsl z2.s, z2.s, #16
693-
; SVE-NEXT: lsl z1.s, z1.s, #16
694-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
695-
; SVE-NEXT: uunpklo z4.s, z0.h
696-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
697-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
663+
; SVE-NEXT: mov z3.s, w8
664+
; SVE-NEXT: mov z4.d, z3.d
665+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
666+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
667+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
698668
; SVE-NEXT: uunpkhi z3.s, z0.h
669+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
670+
; SVE-NEXT: uunpklo z4.s, z0.h
671+
; SVE-NEXT: lsl z3.s, z3.s, #16
672+
; SVE-NEXT: uunpkhi z2.s, z1.h
673+
; SVE-NEXT: uunpklo z1.s, z1.h
699674
; SVE-NEXT: lsl z4.s, z4.s, #16
700-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
701675
; SVE-NEXT: lsl z2.s, z2.s, #16
702-
; SVE-NEXT: lsl z3.s, z3.s, #16
703676
; SVE-NEXT: lsl z1.s, z1.s, #16
704677
; SVE-NEXT: fadd z2.s, z3.s, z2.s
705678
; SVE-NEXT: fadd z1.s, z4.s, z1.s
@@ -725,24 +698,21 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x b
725698
define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x i1> %mask) {
726699
; SVE-LABEL: fsub_sel_fmul_negzero_nsz_nxv8bf16:
727700
; SVE: // %bb.0:
728-
; SVE-NEXT: uunpkhi z3.s, z2.h
729-
; SVE-NEXT: uunpkhi z4.s, z1.h
730-
; SVE-NEXT: uunpklo z2.s, z2.h
731-
; SVE-NEXT: uunpklo z1.s, z1.h
701+
; SVE-NEXT: mov w8, #-2147483648 // =0x80000000
732702
; SVE-NEXT: ptrue p1.s
733-
; SVE-NEXT: lsl z3.s, z3.s, #16
734-
; SVE-NEXT: lsl z4.s, z4.s, #16
735-
; SVE-NEXT: lsl z2.s, z2.s, #16
736-
; SVE-NEXT: lsl z1.s, z1.s, #16
737-
; SVE-NEXT: fmul z3.s, z4.s, z3.s
738-
; SVE-NEXT: uunpklo z4.s, z0.h
739-
; SVE-NEXT: fmul z1.s, z1.s, z2.s
740-
; SVE-NEXT: bfcvt z2.h, p1/m, z3.s
703+
; SVE-NEXT: mov z3.s, w8
704+
; SVE-NEXT: mov z4.d, z3.d
705+
; SVE-NEXT: bfmlalb z3.s, z1.h, z2.h
706+
; SVE-NEXT: bfmlalt z4.s, z1.h, z2.h
707+
; SVE-NEXT: bfcvt z1.h, p1/m, z3.s
741708
; SVE-NEXT: uunpkhi z3.s, z0.h
709+
; SVE-NEXT: bfcvtnt z1.h, p1/m, z4.s
710+
; SVE-NEXT: uunpklo z4.s, z0.h
711+
; SVE-NEXT: lsl z3.s, z3.s, #16
712+
; SVE-NEXT: uunpkhi z2.s, z1.h
713+
; SVE-NEXT: uunpklo z1.s, z1.h
742714
; SVE-NEXT: lsl z4.s, z4.s, #16
743-
; SVE-NEXT: bfcvt z1.h, p1/m, z1.s
744715
; SVE-NEXT: lsl z2.s, z2.s, #16
745-
; SVE-NEXT: lsl z3.s, z3.s, #16
746716
; SVE-NEXT: lsl z1.s, z1.s, #16
747717
; SVE-NEXT: fsub z2.s, z3.s, z2.s
748718
; SVE-NEXT: fsub z1.s, z4.s, z1.s

0 commit comments

Comments
 (0)