Skip to content

Commit 8534e98

Browse files
committed
[AArch64][GlobalISel] Add codegen for simd fpcvt instructions
1 parent 5899bca commit 8534e98

14 files changed

+2735
-91
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5299,28 +5299,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
52995299
}
53005300
}
53015301

5302-
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
5302+
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
5303+
SDPatternOperator OpN> {
53035304
// double-precision to 32-bit SIMD/FPR
53045305
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
5305-
[]> {
5306+
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
53065307
let Inst{31} = 0; // 32-bit FPR flag
53075308
}
53085309

53095310
// half-precision to 32-bit SIMD/FPR
53105311
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
5311-
[]> {
5312+
[(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
53125313
let Inst{31} = 0; // 32-bit FPR flag
53135314
}
53145315

53155316
// half-precision to 64-bit SIMD/FPR
53165317
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
5317-
[]> {
5318+
[(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
53185319
let Inst{31} = 1; // 64-bit FPR flag
53195320
}
53205321

53215322
// single-precision to 64-bit SIMD/FPR
53225323
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
5323-
[]> {
5324+
[(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
53245325
let Inst{31} = 1; // 64-bit FPR flag
53255326
}
53265327
}
@@ -7949,6 +7950,21 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
79497950
}
79507951
}
79517952

7953+
let mayRaiseFPException = 1, Uses = [FPCR] in
7954+
multiclass SIMDFPTwoScalarFCVT<bit U, bit S, bits<5> opc, string asm,
7955+
SDPatternOperator OpN> {
7956+
let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1 in {
7957+
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
7958+
[(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
7959+
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
7960+
[(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
7961+
}
7962+
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
7963+
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
7964+
[(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
7965+
}
7966+
}
7967+
79527968
let mayRaiseFPException = 1, Uses = [FPCR] in
79537969
multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
79547970
SDPatternOperator OpNode> {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 230 additions & 21 deletions
Large diffs are not rendered by default.

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -568,9 +568,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
568568
case Intrinsic::aarch64_neon_fcvtnu:
569569
case Intrinsic::aarch64_neon_fcvtps:
570570
case Intrinsic::aarch64_neon_fcvtpu:
571-
// Force FPR register bank for half types, as those types otherwise
572-
// don't get legalized correctly resulting in fp16 <-> gpr32 COPY's.
573-
return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16();
571+
return true;
574572
default:
575573
break;
576574
}
@@ -864,10 +862,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
864862
case TargetOpcode::G_FPTOUI:
865863
case TargetOpcode::G_INTRINSIC_LRINT:
866864
case TargetOpcode::G_INTRINSIC_LLRINT:
865+
case TargetOpcode::G_LROUND:
866+
case TargetOpcode::G_LLROUND: {
867867
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
868868
break;
869-
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
869+
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
870+
TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
871+
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &
872+
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
873+
[&](const MachineInstr &UseMI) {
874+
return onlyUsesFP(UseMI, MRI, TRI) ||
875+
prefersFPUse(UseMI, MRI, TRI);
876+
}))
877+
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
878+
else
879+
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
870880
break;
881+
}
882+
871883
case TargetOpcode::G_FCMP: {
872884
// If the result is a vector, it must use a FPR.
873885
AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 =
@@ -1143,6 +1155,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
11431155
case TargetOpcode::G_INTRINSIC:
11441156
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
11451157
switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
1158+
case Intrinsic::aarch64_neon_fcvtas:
1159+
case Intrinsic::aarch64_neon_fcvtau:
1160+
case Intrinsic::aarch64_neon_fcvtzs:
1161+
case Intrinsic::aarch64_neon_fcvtzu:
1162+
case Intrinsic::aarch64_neon_fcvtms:
1163+
case Intrinsic::aarch64_neon_fcvtmu:
1164+
case Intrinsic::aarch64_neon_fcvtns:
1165+
case Intrinsic::aarch64_neon_fcvtnu:
1166+
case Intrinsic::aarch64_neon_fcvtps:
1167+
case Intrinsic::aarch64_neon_fcvtpu: {
1168+
OpRegBankIdx[2] = PMI_FirstFPR;
1169+
if (MRI.getType(MI.getOperand(0).getReg()).isVector()) {
1170+
OpRegBankIdx[0] = PMI_FirstFPR;
1171+
break;
1172+
}
1173+
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
1174+
TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI);
1175+
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &
1176+
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
1177+
[&](const MachineInstr &UseMI) {
1178+
return onlyUsesFP(UseMI, MRI, TRI) ||
1179+
prefersFPUse(UseMI, MRI, TRI);
1180+
}))
1181+
OpRegBankIdx[0] = PMI_FirstFPR;
1182+
else
1183+
OpRegBankIdx[0] = PMI_FirstGPR;
1184+
break;
1185+
}
11461186
case Intrinsic::aarch64_neon_vcvtfxs2fp:
11471187
case Intrinsic::aarch64_neon_vcvtfxu2fp:
11481188
case Intrinsic::aarch64_neon_vcvtfp2fxs:
@@ -1179,12 +1219,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
11791219
}
11801220
break;
11811221
}
1182-
case TargetOpcode::G_LROUND:
1183-
case TargetOpcode::G_LLROUND: {
1184-
// Source is always floating point and destination is always integer.
1185-
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
1186-
break;
1187-
}
11881222
}
11891223

11901224
// Finally construct the computed mapping.

llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ body: |
9696
; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
9797
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
9898
; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]]
99-
; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
99+
; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32)
100100
%0:_(s32) = COPY $w0
101101
%2:_(s32) = COPY $w1
102102
%3:_(s32) = COPY $w2

llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ body: |
1414
; CHECK: liveins: $d0
1515
; CHECK-NEXT: {{ $}}
1616
; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
17-
; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND %fpr(s64)
17+
; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND %fpr(s64)
1818
; CHECK-NEXT: $d0 = COPY %llround(s64)
1919
; CHECK-NEXT: RET_ReallyLR implicit $s0
2020
%fpr:_(s64) = COPY $d0
@@ -35,7 +35,7 @@ body: |
3535
; CHECK-NEXT: {{ $}}
3636
; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
3737
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
38-
; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND [[COPY]](s64)
38+
; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND [[COPY]](s64)
3939
; CHECK-NEXT: $d0 = COPY %llround(s64)
4040
; CHECK-NEXT: RET_ReallyLR implicit $s0
4141
%gpr:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ body: |
1414
; CHECK: liveins: $d0
1515
; CHECK-NEXT: {{ $}}
1616
; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0
17-
; CHECK-NEXT: %lround:gpr(s64) = G_LROUND %fpr(s64)
17+
; CHECK-NEXT: %lround:fpr(s64) = G_LROUND %fpr(s64)
1818
; CHECK-NEXT: $d0 = COPY %lround(s64)
1919
; CHECK-NEXT: RET_ReallyLR implicit $s0
2020
%fpr:_(s64) = COPY $d0
@@ -35,7 +35,7 @@ body: |
3535
; CHECK-NEXT: {{ $}}
3636
; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0
3737
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64)
38-
; CHECK-NEXT: %lround:gpr(s64) = G_LROUND [[COPY]](s64)
38+
; CHECK-NEXT: %lround:fpr(s64) = G_LROUND [[COPY]](s64)
3939
; CHECK-NEXT: $d0 = COPY %lround(s64)
4040
; CHECK-NEXT: RET_ReallyLR implicit $s0
4141
%gpr:_(s64) = COPY $x0

0 commit comments

Comments
 (0)