Skip to content

Commit 11e1bfe

Browse files
committed
Make zeroing FCVT{XNT,NT} and BFCVTNT destructive
1 parent b343f3f commit 11e1bfe

File tree

4 files changed

+48
-91
lines changed

4 files changed

+48
-91
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,8 +2455,9 @@ let Predicates = [HasBF16, HasSVEorSME] in {
24552455
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
24562456
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
24572457
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
2458-
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2459-
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
2458+
2459+
defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2460+
defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
24602461
} // End HasBF16, HasSVEorSME
24612462

24622463
let Predicates = [HasSVEorSME] in {
@@ -4268,17 +4269,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
42684269
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
42694270

42704271
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
4271-
defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
4272-
def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
4272+
defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
4273+
def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
42734274
// Placing even
4274-
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
4275+
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
42754276

42764277
// SVE2p2 floating-point convert precision up, zeroing predicate
4277-
defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
4278+
defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
42784279

42794280
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
4280-
def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
4281-
// Placing corresponding
4281+
def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
42824282
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
42834283

42844284
// Floating-point convert to integer, zeroing predicate

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 30 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,85 +2787,68 @@ multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
27872787
// SVE2 Floating Point Convert Group
27882788
//===----------------------------------------------------------------------===//
27892789

2790-
class sve2_fp_convert_precision<bits<4> opc, string asm,
2791-
ZPRRegOp zprty1, ZPRRegOp zprty2>
2792-
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
2793-
asm, "\t$Zd, $Pg/m, $Zn",
2790+
class sve2_fp_convert_precision<bits<4> opc, bit merging, string asm,
2791+
ZPRRegOp zprty1, ZPRRegOp zprty2, bit destructive=merging>
2792+
: I<(outs zprty1:$Zd),
2793+
!if(destructive, (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
2794+
(ins PPR3bAny:$Pg, zprty2:$Zn)),
2795+
asm, "\t$Zd, " # !if(merging, "$Pg/m", "$Pg/z") # ", $Zn",
27942796
"",
27952797
[]>, Sched<[]> {
27962798
bits<5> Zd;
27972799
bits<5> Zn;
27982800
bits<3> Pg;
27992801
let Inst{31-24} = 0b01100100;
28002802
let Inst{23-22} = opc{3-2};
2801-
let Inst{21-18} = 0b0010;
2803+
let Inst{21-20} = 0b00;
2804+
let Inst{19} = merging;
2805+
let Inst{18} = 0b0;
28022806
let Inst{17-16} = opc{1-0};
28032807
let Inst{15-13} = 0b101;
28042808
let Inst{12-10} = Pg;
28052809
let Inst{9-5} = Zn;
28062810
let Inst{4-0} = Zd;
28072811

2808-
let Constraints = "$Zd = $_Zd";
2812+
let Constraints = !if(destructive, "$Zd = $_Zd", "");
28092813
let hasSideEffects = 0;
28102814
let mayRaiseFPException = 1;
28112815
}
28122816

28132817
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
2814-
def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
2815-
def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
2818+
def _StoH : sve2_fp_convert_precision<0b1000, 0b1, asm, ZPR16, ZPR32>;
2819+
def _DtoS : sve2_fp_convert_precision<0b1110, 0b1, asm, ZPR32, ZPR64>;
28162820

28172821
def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
28182822
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
28192823
}
28202824

28212825
multiclass sve2_fp_convert_up_long<string asm, string op> {
2822-
def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
2823-
def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
2826+
def _HtoS : sve2_fp_convert_precision<0b1001, 0b1, asm, ZPR32, ZPR16>;
2827+
def _StoD : sve2_fp_convert_precision<0b1111, 0b1, asm, ZPR64, ZPR32>;
28242828

28252829
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
28262830
def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
28272831
}
28282832

28292833
multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
2830-
def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
2834+
def _DtoS : sve2_fp_convert_precision<0b0010, 0b1, asm, ZPR32, ZPR64>;
28312835

28322836
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
28332837
}
28342838

2835-
class sve_fp_fcvt2z<bits<4> opc, string asm, ZPRRegOp zprty1,
2836-
ZPRRegOp zprty2>
2837-
: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
2838-
asm, "\t$Zd, $Pg/z, $Zn",
2839-
"",
2840-
[]>, Sched<[]> {
2841-
bits<5> Zd;
2842-
bits<5> Zn;
2843-
bits<3> Pg;
2844-
let Inst{31-24} = 0b01100100;
2845-
let Inst{23-22} = opc{3-2};
2846-
let Inst{21-18} = 0b0000;
2847-
let Inst{17-16} = opc{1-0};
2848-
let Inst{15-13} = 0b101;
2849-
let Inst{12-10} = Pg;
2850-
let Inst{9-5} = Zn;
2851-
let Inst{4-0} = Zd;
2852-
let hasSideEffects = 0;
2853-
let mayRaiseFPException = 1;
2854-
}
2855-
2856-
multiclass sve_fp_fcvtntz<string asm> {
2857-
def _StoH : sve_fp_fcvt2z<0b1000, asm, ZPR16, ZPR32>;
2858-
def _DtoS : sve_fp_fcvt2z<0b1110, asm, ZPR32, ZPR64>;
2859-
}
2860-
2861-
multiclass sve_fp_fcvtltz<string asm, string op> {
2862-
def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
2863-
def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
2839+
multiclass sve2_fp_convert_up_long_z<string asm, string op> {
2840+
def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
2841+
def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
28642842

28652843
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
28662844
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
28672845
}
28682846

2847+
multiclass sve2_fp_convert_down_narrow_z<string asm> {
2848+
def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
2849+
def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
2850+
}
2851+
28692852
//===----------------------------------------------------------------------===//
28702853
// SVE2 Floating Point Pairwise Group
28712854
//===----------------------------------------------------------------------===//
@@ -9296,33 +9279,18 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
92969279
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
92979280
}
92989281

9299-
class sve_bfloat_convert<bit N, string asm>
9300-
: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
9301-
asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
9302-
bits<5> Zd;
9303-
bits<3> Pg;
9304-
bits<5> Zn;
9305-
let Inst{31-25} = 0b0110010;
9306-
let Inst{24} = N;
9307-
let Inst{23-13} = 0b10001010101;
9308-
let Inst{12-10} = Pg;
9309-
let Inst{9-5} = Zn;
9310-
let Inst{4-0} = Zd;
9282+
multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperator ir_op> {
9283+
def NAME : sve_fp_2op_p_zd<0b1001010, asm, ZPR32, ZPR16, ElementSizeS>;
93119284

9312-
let Constraints = "$Zd = $_Zd";
9313-
let DestructiveInstType = DestructiveOther;
9314-
let ElementSize = ElementSizeS;
9315-
let hasSideEffects = 0;
9316-
let mayRaiseFPException = 1;
9285+
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9286+
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9287+
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
93179288
}
93189289

9319-
multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
9320-
SDPatternOperator ir_op = null_frag> {
9321-
def NAME : sve_bfloat_convert<N, asm>;
9290+
multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
9291+
def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
93229292

93239293
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9324-
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9325-
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
93269294
}
93279295

93289296
//===----------------------------------------------------------------------===//

llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,17 @@ bfcvtnt z0.h, p8/m, z1.s
2020
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
2121
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
2222

23+
// --------------------------------------------------------------------------//
24+
// Negative tests for instructions that are incompatible with movprfx
25+
2326
movprfx z0.h, p0/m, z7.h
2427
bfcvtnt z0.h, p0/m, z1.s
25-
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
28+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
2629
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
2730
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
31+
32+
movprfx z0, z7
33+
bfcvtnt z0.h, p7/m, z1.s
34+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
35+
// CHECK-NEXT: bfcvtnt z0.h, p7/m, z1.s
36+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

llvm/test/MC/AArch64/SVE/bfcvtnt.s

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,3 @@ bfcvtnt z0.H, p0/m, z1.S
99
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
1010
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
1111
// CHECK-ERROR: instruction requires: bf16 sve or sme
12-
13-
movprfx z0.S, p0/m, z2.S
14-
// CHECK-INST: movprfx z0.s, p0/m, z2.s
15-
// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
16-
// CHECK-ERROR: instruction requires: sve or sme
17-
18-
bfcvtnt z0.H, p0/m, z1.S
19-
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
20-
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
21-
// CHECK-ERROR: instruction requires: bf16 sve or sme
22-
23-
movprfx z0, z2
24-
// CHECK-INST: movprfx z0, z2
25-
// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
26-
// CHECK-ERROR: instruction requires: sve or sme
27-
28-
bfcvtnt z0.H, p0/m, z1.S
29-
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
30-
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
31-
// CHECK-ERROR: instruction requires: bf16 sve or sme

0 commit comments

Comments
 (0)