Skip to content

Commit 21c4c15

Browse files
[LLVM][CodeGen][SVE] Only use unpredicated bfloat instructions when all lanes are in use. (#168387)
While SVE support for exception safe floating point code generation is bare bones we try to ensure inactive lanes remiain inert. I mistakenly broke this rule when adding support for SVE-B16B16 by lowering some bfloat operations of unpacked vectors to unpredicated instructions.
1 parent 3da82af commit 21c4c15

File tree

4 files changed

+25
-37
lines changed

4 files changed

+25
-37
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,14 +1786,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
17861786

17871787
if (Subtarget->hasSVEB16B16() &&
17881788
Subtarget->isNonStreamingSVEorSME2Available()) {
1789-
setOperationAction(ISD::FADD, VT, Legal);
1789+
setOperationAction(ISD::FADD, VT, Custom);
17901790
setOperationAction(ISD::FMA, VT, Custom);
17911791
setOperationAction(ISD::FMAXIMUM, VT, Custom);
17921792
setOperationAction(ISD::FMAXNUM, VT, Custom);
17931793
setOperationAction(ISD::FMINIMUM, VT, Custom);
17941794
setOperationAction(ISD::FMINNUM, VT, Custom);
1795-
setOperationAction(ISD::FMUL, VT, Legal);
1796-
setOperationAction(ISD::FSUB, VT, Legal);
1795+
setOperationAction(ISD::FMUL, VT, Custom);
1796+
setOperationAction(ISD::FSUB, VT, Custom);
17971797
}
17981798
}
17991799

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,8 +2464,6 @@ multiclass sve_fp_3op_u_zd_bfloat<bits<3> opc, string asm, SDPatternOperator op>
24642464
def NAME : sve_fp_3op_u_zd<0b00, opc, asm, ZPR16>;
24652465

24662466
def : SVE_2_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8bf16, !cast<Instruction>(NAME)>;
2467-
def : SVE_2_Op_Pat<nxv4bf16, op, nxv4bf16, nxv4bf16, !cast<Instruction>(NAME)>;
2468-
def : SVE_2_Op_Pat<nxv2bf16, op, nxv2bf16, nxv2bf16, !cast<Instruction>(NAME)>;
24692467
}
24702468

24712469
multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {

llvm/test/CodeGen/AArch64/sve-bf16-arith.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ define <vscale x 2 x bfloat> @fadd_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
5656
;
5757
; B16B16-LABEL: fadd_nxv2bf16:
5858
; B16B16: // %bb.0:
59-
; B16B16-NEXT: bfadd z0.h, z0.h, z1.h
59+
; B16B16-NEXT: ptrue p0.d
60+
; B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
6061
; B16B16-NEXT: ret
6162
%res = fadd <vscale x 2 x bfloat> %a, %b
6263
ret <vscale x 2 x bfloat> %res
@@ -74,7 +75,8 @@ define <vscale x 4 x bfloat> @fadd_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
7475
;
7576
; B16B16-LABEL: fadd_nxv4bf16:
7677
; B16B16: // %bb.0:
77-
; B16B16-NEXT: bfadd z0.h, z0.h, z1.h
78+
; B16B16-NEXT: ptrue p0.s
79+
; B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
7880
; B16B16-NEXT: ret
7981
%res = fadd <vscale x 4 x bfloat> %a, %b
8082
ret <vscale x 4 x bfloat> %res
@@ -525,7 +527,8 @@ define <vscale x 2 x bfloat> @fmul_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
525527
;
526528
; B16B16-LABEL: fmul_nxv2bf16:
527529
; B16B16: // %bb.0:
528-
; B16B16-NEXT: bfmul z0.h, z0.h, z1.h
530+
; B16B16-NEXT: ptrue p0.d
531+
; B16B16-NEXT: bfmul z0.h, p0/m, z0.h, z1.h
529532
; B16B16-NEXT: ret
530533
%res = fmul <vscale x 2 x bfloat> %a, %b
531534
ret <vscale x 2 x bfloat> %res
@@ -543,7 +546,8 @@ define <vscale x 4 x bfloat> @fmul_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
543546
;
544547
; B16B16-LABEL: fmul_nxv4bf16:
545548
; B16B16: // %bb.0:
546-
; B16B16-NEXT: bfmul z0.h, z0.h, z1.h
549+
; B16B16-NEXT: ptrue p0.s
550+
; B16B16-NEXT: bfmul z0.h, p0/m, z0.h, z1.h
547551
; B16B16-NEXT: ret
548552
%res = fmul <vscale x 4 x bfloat> %a, %b
549553
ret <vscale x 4 x bfloat> %res
@@ -672,7 +676,8 @@ define <vscale x 2 x bfloat> @fsub_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x
672676
;
673677
; B16B16-LABEL: fsub_nxv2bf16:
674678
; B16B16: // %bb.0:
675-
; B16B16-NEXT: bfsub z0.h, z0.h, z1.h
679+
; B16B16-NEXT: ptrue p0.d
680+
; B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
676681
; B16B16-NEXT: ret
677682
%res = fsub <vscale x 2 x bfloat> %a, %b
678683
ret <vscale x 2 x bfloat> %res
@@ -690,7 +695,8 @@ define <vscale x 4 x bfloat> @fsub_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x
690695
;
691696
; B16B16-LABEL: fsub_nxv4bf16:
692697
; B16B16: // %bb.0:
693-
; B16B16-NEXT: bfsub z0.h, z0.h, z1.h
698+
; B16B16-NEXT: ptrue p0.s
699+
; B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
694700
; B16B16-NEXT: ret
695701
%res = fsub <vscale x 4 x bfloat> %a, %b
696702
ret <vscale x 4 x bfloat> %res

llvm/test/CodeGen/AArch64/sve-bf16-combines.ll

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,7 @@ define <vscale x 8 x bfloat> @fadd_sel_nxv8bf16(<vscale x 8 x bfloat> %a, <vscal
311311
;
312312
; SVE-B16B16-LABEL: fadd_sel_nxv8bf16:
313313
; SVE-B16B16: // %bb.0:
314-
; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
315-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
314+
; SVE-B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
316315
; SVE-B16B16-NEXT: ret
317316
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> zeroinitializer
318317
%fadd = fadd nsz <vscale x 8 x bfloat> %a, %sel
@@ -341,8 +340,7 @@ define <vscale x 8 x bfloat> @fsub_sel_nxv8bf16(<vscale x 8 x bfloat> %a, <vscal
341340
;
342341
; SVE-B16B16-LABEL: fsub_sel_nxv8bf16:
343342
; SVE-B16B16: // %bb.0:
344-
; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
345-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
343+
; SVE-B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
346344
; SVE-B16B16-NEXT: ret
347345
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> zeroinitializer
348346
%fsub = fsub <vscale x 8 x bfloat> %a, %sel
@@ -371,8 +369,7 @@ define <vscale x 8 x bfloat> @fadd_sel_negzero_nxv8bf16(<vscale x 8 x bfloat> %a
371369
;
372370
; SVE-B16B16-LABEL: fadd_sel_negzero_nxv8bf16:
373371
; SVE-B16B16: // %bb.0:
374-
; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
375-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
372+
; SVE-B16B16-NEXT: bfadd z0.h, p0/m, z0.h, z1.h
376373
; SVE-B16B16-NEXT: ret
377374
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
378375
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %nz
@@ -402,8 +399,7 @@ define <vscale x 8 x bfloat> @fsub_sel_negzero_nxv8bf16(<vscale x 8 x bfloat> %a
402399
;
403400
; SVE-B16B16-LABEL: fsub_sel_negzero_nxv8bf16:
404401
; SVE-B16B16: // %bb.0:
405-
; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
406-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
402+
; SVE-B16B16-NEXT: bfsub z0.h, p0/m, z0.h, z1.h
407403
; SVE-B16B16-NEXT: ret
408404
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
409405
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %nz
@@ -490,9 +486,7 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nxv8bf16(<vscale x 8 x bfloat> %a, <
490486
;
491487
; SVE-B16B16-LABEL: fsub_sel_fmul_nxv8bf16:
492488
; SVE-B16B16: // %bb.0:
493-
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
494-
; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
495-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
489+
; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h
496490
; SVE-B16B16-NEXT: ret
497491
%fmul = fmul <vscale x 8 x bfloat> %b, %c
498492
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %fmul, <vscale x 8 x bfloat> zeroinitializer
@@ -532,9 +526,7 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
532526
;
533527
; SVE-B16B16-LABEL: fadd_sel_fmul_nsz_nxv8bf16:
534528
; SVE-B16B16: // %bb.0:
535-
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
536-
; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
537-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
529+
; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h
538530
; SVE-B16B16-NEXT: ret
539531
%fmul = fmul <vscale x 8 x bfloat> %b, %c
540532
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %fmul, <vscale x 8 x bfloat> zeroinitializer
@@ -574,9 +566,7 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_nsz_nxv8bf16(<vscale x 8 x bfloat> %
574566
;
575567
; SVE-B16B16-LABEL: fsub_sel_fmul_nsz_nxv8bf16:
576568
; SVE-B16B16: // %bb.0:
577-
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
578-
; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
579-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
569+
; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h
580570
; SVE-B16B16-NEXT: ret
581571
%fmul = fmul <vscale x 8 x bfloat> %b, %c
582572
%sel = select <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %fmul, <vscale x 8 x bfloat> zeroinitializer
@@ -616,9 +606,7 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nxv8bf16(<vscale x 8 x bfloa
616606
;
617607
; SVE-B16B16-LABEL: fadd_sel_fmul_negzero_nxv8bf16:
618608
; SVE-B16B16: // %bb.0:
619-
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
620-
; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
621-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
609+
; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h
622610
; SVE-B16B16-NEXT: ret
623611
%fmul = fmul <vscale x 8 x bfloat> %b, %c
624612
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
@@ -707,9 +695,7 @@ define <vscale x 8 x bfloat> @fadd_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x b
707695
;
708696
; SVE-B16B16-LABEL: fadd_sel_fmul_negzero_nsz_nxv8bf16:
709697
; SVE-B16B16: // %bb.0:
710-
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
711-
; SVE-B16B16-NEXT: bfadd z1.h, z0.h, z1.h
712-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
698+
; SVE-B16B16-NEXT: bfmla z0.h, p0/m, z1.h, z2.h
713699
; SVE-B16B16-NEXT: ret
714700
%fmul = fmul <vscale x 8 x bfloat> %b, %c
715701
%nz = fneg <vscale x 8 x bfloat> zeroinitializer
@@ -750,9 +736,7 @@ define <vscale x 8 x bfloat> @fsub_sel_fmul_negzero_nsz_nxv8bf16(<vscale x 8 x b
750736
;
751737
; SVE-B16B16-LABEL: fsub_sel_fmul_negzero_nsz_nxv8bf16:
752738
; SVE-B16B16: // %bb.0:
753-
; SVE-B16B16-NEXT: bfmul z1.h, z1.h, z2.h
754-
; SVE-B16B16-NEXT: bfsub z1.h, z0.h, z1.h
755-
; SVE-B16B16-NEXT: mov z0.h, p0/m, z1.h
739+
; SVE-B16B16-NEXT: bfmls z0.h, p0/m, z1.h, z2.h
756740
; SVE-B16B16-NEXT: ret
757741
%fmul = fmul <vscale x 8 x bfloat> %b, %c
758742
%nz = fneg <vscale x 8 x bfloat> zeroinitializer

0 commit comments

Comments
 (0)