Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -5299,28 +5299,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}

multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN = null_frag> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
[]> {
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
[]> {
[(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}

// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
[]> {
[(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}

// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
[]> {
[(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}
Expand Down Expand Up @@ -7949,6 +7950,21 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
}
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalarFCVT<bit U, bit S, bits<5> opc, string asm,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can fold this into SIMDFPTwoScalar providing they pass null_frag.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That will disable all instructions which use that class in FastISel. But if that is fine I can do that.

SDPatternOperator OpN = null_frag> {
let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1 in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
[(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
}
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
[(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
}
}

let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
Expand Down
87 changes: 69 additions & 18 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -5232,19 +5232,54 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;

defm FCVTAS : SIMDFPTwoScalarFCVT< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In relation to this from #156892

I wanted to keep patterns together and unfortunately in tablegen you need to define records above their usage so I needed to move these instructions up.

Could we keep the instructions together with the correct kinds, and move the patterns later? I think it's OK to move the patterns later if you wanted to keep them together.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that, but that will require moving all patterns down, which seems more intrusive than moving couple of instructions up. Why is it so important to keep the instructions in their original location? The description of the section they are moved to matches their behaviour as well.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just that I was considering the v1i32 instructions "scalar neon instructions" that are really operating on the first lane of a neon register and require hasNeon I believe. The SDr style instructions are "FP" instructions that tend to operate between FPR and GPR. I would still consider the v1i32 instructions "Advanced SIMD two scalar instructions" and a little different to normal fp instructions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok moved them back and moved patterns down.

defm FCVTAU : SIMDFPTwoScalarFCVT< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
defm FCVTMS : SIMDFPTwoScalarFCVT< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDFPTwoScalarFCVT< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
defm FCVTNS : SIMDFPTwoScalarFCVT< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
defm FCVTNU : SIMDFPTwoScalarFCVT< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : SIMDFPTwoScalarFCVT< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDFPTwoScalarFCVT< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
defm FCVTZS : SIMDFPTwoScalarFCVT< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalarFCVT< 1, 1, 0b11011, "fcvtzu">;

let Predicates = [HasNEON, HasFPRCVT] in{
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>;
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>;
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>;
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>;
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>;
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
}

multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
(!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
(!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
(!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
(!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;

}
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;

// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
let Predicates = [HasFullFP16] in {
Expand Down Expand Up @@ -5321,6 +5356,32 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;

// For global-isel we can use register classes to determine
// which FCVT instruction to use.
let Predicates = [HasFPRCVT] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
}
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;

let Predicates = [HasFPRCVT] in {
def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))),
(!cast<Instruction>(INST # SHr) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))),
(!cast<Instruction>(INST # DHr) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))),
(!cast<Instruction>(INST # DSr) $Rn)>;
def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
(!cast<Instruction>(INST # SDr) $Rn)>;
}
def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
(!cast<Instruction>(INST # v1i32) $Rn)>;
def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
(!cast<Instruction>(INST # v1i64) $Rn)>;

let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
Expand Down Expand Up @@ -6569,17 +6630,7 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
Expand Down
32 changes: 29 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,9 +572,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu:
// Force FPR register bank for half types, as those types otherwise
// don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
return true;
default:
break;
}
Expand Down Expand Up @@ -1147,6 +1145,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC:
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
case Intrinsic::aarch64_neon_fcvtas:
case Intrinsic::aarch64_neon_fcvtau:
case Intrinsic::aarch64_neon_fcvtzs:
case Intrinsic::aarch64_neon_fcvtzu:
case Intrinsic::aarch64_neon_fcvtms:
case Intrinsic::aarch64_neon_fcvtmu:
case Intrinsic::aarch64_neon_fcvtns:
case Intrinsic::aarch64_neon_fcvtnu:
case Intrinsic::aarch64_neon_fcvtps:
case Intrinsic::aarch64_neon_fcvtpu: {
OpRegBankIdx[2] = PMI_FirstFPR;
if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
OpRegBankIdx[0] = PMI_FirstFPR;
break;
}
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](const MachineInstr &UseMI) {
return onlyUsesFP(UseMI, MRI, TRI) ||
prefersFPUse(UseMI, MRI, TRI);
}))
OpRegBankIdx[0] = PMI_FirstFPR;
else
OpRegBankIdx[0] = PMI_FirstGPR;
break;
}
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
case Intrinsic::aarch64_neon_vcvtfp2fxs:
Expand Down
Loading
Loading