Skip to content

Commit d091259

Browse files
committed
[AArch64][GlobalISel] Be more precise in RegBankSelect for s/uitofp
We can generate fpr->fpr instructions for G_SITOFP and G_UITOFP. It was previously marking the instructions as FPR but then generating GPR instructions and introducing a copy.
1 parent f9c20ba commit d091259

File tree

5 files changed

+49
-212
lines changed

5 files changed

+49
-212
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,67 +1102,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
11021102
return true;
11031103
}
11041104

1105-
static unsigned selectIntToFPConvOpc(unsigned GenericOpc, LLT DstTy,
1106-
LLT SrcTy) {
1107-
if (!DstTy.isScalar() || !SrcTy.isScalar())
1108-
return GenericOpc;
1109-
1110-
const unsigned DstSize = DstTy.getSizeInBits();
1111-
const unsigned SrcSize = SrcTy.getSizeInBits();
1112-
1113-
switch (DstSize) {
1114-
case 32:
1115-
switch (SrcSize) {
1116-
case 32:
1117-
switch (GenericOpc) {
1118-
case TargetOpcode::G_SITOFP:
1119-
return AArch64::SCVTFUWSri;
1120-
case TargetOpcode::G_UITOFP:
1121-
return AArch64::UCVTFUWSri;
1122-
default:
1123-
return GenericOpc;
1124-
}
1125-
case 64:
1126-
switch (GenericOpc) {
1127-
case TargetOpcode::G_SITOFP:
1128-
return AArch64::SCVTFUXSri;
1129-
case TargetOpcode::G_UITOFP:
1130-
return AArch64::UCVTFUXSri;
1131-
default:
1132-
return GenericOpc;
1133-
}
1134-
default:
1135-
return GenericOpc;
1136-
}
1137-
case 64:
1138-
switch (SrcSize) {
1139-
case 32:
1140-
switch (GenericOpc) {
1141-
case TargetOpcode::G_SITOFP:
1142-
return AArch64::SCVTFUWDri;
1143-
case TargetOpcode::G_UITOFP:
1144-
return AArch64::UCVTFUWDri;
1145-
default:
1146-
return GenericOpc;
1147-
}
1148-
case 64:
1149-
switch (GenericOpc) {
1150-
case TargetOpcode::G_SITOFP:
1151-
return AArch64::SCVTFUXDri;
1152-
case TargetOpcode::G_UITOFP:
1153-
return AArch64::UCVTFUXDri;
1154-
default:
1155-
return GenericOpc;
1156-
}
1157-
default:
1158-
return GenericOpc;
1159-
}
1160-
default:
1161-
return GenericOpc;
1162-
};
1163-
return GenericOpc;
1164-
}
1165-
11661105
MachineInstr *
11671106
AArch64InstructionSelector::emitSelect(Register Dst, Register True,
11681107
Register False, AArch64CC::CondCode CC,
@@ -3509,21 +3448,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
35093448
return true;
35103449
}
35113450

3512-
case TargetOpcode::G_SITOFP:
3513-
case TargetOpcode::G_UITOFP: {
3514-
const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3515-
SrcTy = MRI.getType(I.getOperand(1).getReg());
3516-
const unsigned NewOpc = selectIntToFPConvOpc(Opcode, DstTy, SrcTy);
3517-
if (NewOpc == Opcode)
3518-
return false;
3519-
3520-
I.setDesc(TII.get(NewOpc));
3521-
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3522-
I.setFlags(MachineInstr::NoFPExcept);
3523-
3524-
return true;
3525-
}
3526-
35273451
case TargetOpcode::G_FREEZE:
35283452
return selectCopy(I, TII, MRI, TRI, RBI);
35293453

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,19 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
618618
return hasFPConstraints(MI, MRI, TRI, Depth);
619619
}
620620

621+
bool AArch64RegisterBankInfo::prefersFPUse(const MachineInstr &MI,
622+
const MachineRegisterInfo &MRI,
623+
const TargetRegisterInfo &TRI,
624+
unsigned Depth) const {
625+
switch (MI.getOpcode()) {
626+
case TargetOpcode::G_SITOFP:
627+
case TargetOpcode::G_UITOFP:
628+
return MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() ==
629+
MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
630+
}
631+
return onlyDefinesFP(MI, MRI, TRI, Depth);
632+
}
633+
621634
bool AArch64RegisterBankInfo::isLoadFromFPType(const MachineInstr &MI) const {
622635
// GMemOperation because we also want to match indexed loads.
623636
auto *MemOp = cast<GMemOperation>(&MI);
@@ -826,7 +839,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
826839
// Integer to FP conversions don't necessarily happen between GPR -> FPR
827840
// regbanks. They can also be done within an FPR register.
828841
Register SrcReg = MI.getOperand(1).getReg();
829-
if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
842+
if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank &&
843+
MRI.getType(SrcReg).getSizeInBits() ==
844+
MRI.getType(MI.getOperand(0).getReg()).getSizeInBits())
830845
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
831846
else
832847
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
@@ -895,13 +910,13 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
895910
// instruction.
896911
//
897912
// Int->FP conversion operations are also captured in
898-
// onlyDefinesFP().
913+
// prefersFPUse().
899914

900915
if (isPHIWithFPConstraints(UseMI, MRI, TRI))
901916
return true;
902917

903918
return onlyUsesFP(UseMI, MRI, TRI) ||
904-
onlyDefinesFP(UseMI, MRI, TRI);
919+
prefersFPUse(UseMI, MRI, TRI);
905920
}))
906921
OpRegBankIdx[0] = PMI_FirstFPR;
907922
break;

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
139139
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
140140
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
141141

142+
/// \returns true if \p MI can take both fpr and gpr uses, but prefers fp.
143+
bool prefersFPUse(const MachineInstr &MI, const MachineRegisterInfo &MRI,
144+
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
145+
142146
/// \returns true if the load \p MI is likely loading from a floating-point
143147
/// type.
144148
bool isLoadFromFPType(const MachineInstr &MI) const;

llvm/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -357,31 +357,6 @@ body: |
357357
$d0 = COPY %1(s64)
358358
...
359359

360-
---
361-
name: sitofp_s64_s32_fpr_both
362-
legalized: true
363-
regBankSelected: true
364-
365-
registers:
366-
- { id: 0, class: fpr }
367-
- { id: 1, class: fpr }
368-
369-
body: |
370-
bb.0:
371-
liveins: $s0
372-
373-
; CHECK-LABEL: name: sitofp_s64_s32_fpr_both
374-
; CHECK: liveins: $s0
375-
; CHECK-NEXT: {{ $}}
376-
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
377-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
378-
; CHECK-NEXT: [[SCVTFUWDri:%[0-9]+]]:fpr64 = nofpexcept SCVTFUWDri [[COPY1]]
379-
; CHECK-NEXT: $d0 = COPY [[SCVTFUWDri]]
380-
%0(s32) = COPY $s0
381-
%1(s64) = G_SITOFP %0
382-
$d0 = COPY %1(s64)
383-
...
384-
385360
---
386361
name: sitofp_s64_s64_fpr
387362
legalized: true

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 27 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16,CHECK-NOFP16-GI
55
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-FP16-GI
66

7-
; CHECK-FP16-GI: warning: Instruction selection used fallback path for stofp_load_i64_f16
8-
; CHECK-FP16-GI-NEXT: warning: Instruction selection used fallback path for utofp_load_i64_f16
9-
; CHECK-FP16-GI-NEXT: warning: Instruction selection used fallback path for stofp_load_i32_f16
10-
; CHECK-FP16-GI-NEXT: warning: Instruction selection used fallback path for utofp_load_i32_f16
11-
127
define double @stofp_i64_f64(i64 %a) {
138
; CHECK-LABEL: stofp_i64_f64:
149
; CHECK: // %bb.0: // %entry
@@ -844,31 +839,11 @@ entry:
844839
}
845840

846841
define double @stofp_load_i32_f64(ptr %p) {
847-
; CHECK-NOFP16-SD-LABEL: stofp_load_i32_f64:
848-
; CHECK-NOFP16-SD: // %bb.0: // %entry
849-
; CHECK-NOFP16-SD-NEXT: ldr w8, [x0]
850-
; CHECK-NOFP16-SD-NEXT: scvtf d0, w8
851-
; CHECK-NOFP16-SD-NEXT: ret
852-
;
853-
; CHECK-FP16-SD-LABEL: stofp_load_i32_f64:
854-
; CHECK-FP16-SD: // %bb.0: // %entry
855-
; CHECK-FP16-SD-NEXT: ldr w8, [x0]
856-
; CHECK-FP16-SD-NEXT: scvtf d0, w8
857-
; CHECK-FP16-SD-NEXT: ret
858-
;
859-
; CHECK-NOFP16-GI-LABEL: stofp_load_i32_f64:
860-
; CHECK-NOFP16-GI: // %bb.0: // %entry
861-
; CHECK-NOFP16-GI-NEXT: ldr s0, [x0]
862-
; CHECK-NOFP16-GI-NEXT: fmov w8, s0
863-
; CHECK-NOFP16-GI-NEXT: scvtf d0, w8
864-
; CHECK-NOFP16-GI-NEXT: ret
865-
;
866-
; CHECK-FP16-GI-LABEL: stofp_load_i32_f64:
867-
; CHECK-FP16-GI: // %bb.0: // %entry
868-
; CHECK-FP16-GI-NEXT: ldr s0, [x0]
869-
; CHECK-FP16-GI-NEXT: fmov w8, s0
870-
; CHECK-FP16-GI-NEXT: scvtf d0, w8
871-
; CHECK-FP16-GI-NEXT: ret
842+
; CHECK-LABEL: stofp_load_i32_f64:
843+
; CHECK: // %bb.0: // %entry
844+
; CHECK-NEXT: ldr w8, [x0]
845+
; CHECK-NEXT: scvtf d0, w8
846+
; CHECK-NEXT: ret
872847
entry:
873848
%a = load i32, ptr %p
874849
%c = sitofp i32 %a to double
@@ -936,63 +911,23 @@ entry:
936911
}
937912

938913
define float @stofp_load_i64_f32(ptr %p) {
939-
; CHECK-NOFP16-SD-LABEL: stofp_load_i64_f32:
940-
; CHECK-NOFP16-SD: // %bb.0: // %entry
941-
; CHECK-NOFP16-SD-NEXT: ldr x8, [x0]
942-
; CHECK-NOFP16-SD-NEXT: scvtf s0, x8
943-
; CHECK-NOFP16-SD-NEXT: ret
944-
;
945-
; CHECK-FP16-SD-LABEL: stofp_load_i64_f32:
946-
; CHECK-FP16-SD: // %bb.0: // %entry
947-
; CHECK-FP16-SD-NEXT: ldr x8, [x0]
948-
; CHECK-FP16-SD-NEXT: scvtf s0, x8
949-
; CHECK-FP16-SD-NEXT: ret
950-
;
951-
; CHECK-NOFP16-GI-LABEL: stofp_load_i64_f32:
952-
; CHECK-NOFP16-GI: // %bb.0: // %entry
953-
; CHECK-NOFP16-GI-NEXT: ldr d0, [x0]
954-
; CHECK-NOFP16-GI-NEXT: fmov x8, d0
955-
; CHECK-NOFP16-GI-NEXT: scvtf s0, x8
956-
; CHECK-NOFP16-GI-NEXT: ret
957-
;
958-
; CHECK-FP16-GI-LABEL: stofp_load_i64_f32:
959-
; CHECK-FP16-GI: // %bb.0: // %entry
960-
; CHECK-FP16-GI-NEXT: ldr d0, [x0]
961-
; CHECK-FP16-GI-NEXT: fmov x8, d0
962-
; CHECK-FP16-GI-NEXT: scvtf s0, x8
963-
; CHECK-FP16-GI-NEXT: ret
914+
; CHECK-LABEL: stofp_load_i64_f32:
915+
; CHECK: // %bb.0: // %entry
916+
; CHECK-NEXT: ldr x8, [x0]
917+
; CHECK-NEXT: scvtf s0, x8
918+
; CHECK-NEXT: ret
964919
entry:
965920
%a = load i64, ptr %p
966921
%c = sitofp i64 %a to float
967922
ret float %c
968923
}
969924

970925
define float @utofp_load_i64_f32(ptr %p) {
971-
; CHECK-NOFP16-SD-LABEL: utofp_load_i64_f32:
972-
; CHECK-NOFP16-SD: // %bb.0: // %entry
973-
; CHECK-NOFP16-SD-NEXT: ldr x8, [x0]
974-
; CHECK-NOFP16-SD-NEXT: ucvtf s0, x8
975-
; CHECK-NOFP16-SD-NEXT: ret
976-
;
977-
; CHECK-FP16-SD-LABEL: utofp_load_i64_f32:
978-
; CHECK-FP16-SD: // %bb.0: // %entry
979-
; CHECK-FP16-SD-NEXT: ldr x8, [x0]
980-
; CHECK-FP16-SD-NEXT: ucvtf s0, x8
981-
; CHECK-FP16-SD-NEXT: ret
982-
;
983-
; CHECK-NOFP16-GI-LABEL: utofp_load_i64_f32:
984-
; CHECK-NOFP16-GI: // %bb.0: // %entry
985-
; CHECK-NOFP16-GI-NEXT: ldr d0, [x0]
986-
; CHECK-NOFP16-GI-NEXT: fmov x8, d0
987-
; CHECK-NOFP16-GI-NEXT: ucvtf s0, x8
988-
; CHECK-NOFP16-GI-NEXT: ret
989-
;
990-
; CHECK-FP16-GI-LABEL: utofp_load_i64_f32:
991-
; CHECK-FP16-GI: // %bb.0: // %entry
992-
; CHECK-FP16-GI-NEXT: ldr d0, [x0]
993-
; CHECK-FP16-GI-NEXT: fmov x8, d0
994-
; CHECK-FP16-GI-NEXT: ucvtf s0, x8
995-
; CHECK-FP16-GI-NEXT: ret
926+
; CHECK-LABEL: utofp_load_i64_f32:
927+
; CHECK: // %bb.0: // %entry
928+
; CHECK-NEXT: ldr x8, [x0]
929+
; CHECK-NEXT: ucvtf s0, x8
930+
; CHECK-NEXT: ret
996931
entry:
997932
%a = load i64, ptr %p
998933
%c = uitofp i64 %a to float
@@ -1072,53 +1007,37 @@ entry:
10721007
}
10731008

10741009
define half @stofp_load_i64_f16(ptr %p) {
1075-
; CHECK-NOFP16-SD-LABEL: stofp_load_i64_f16:
1076-
; CHECK-NOFP16-SD: // %bb.0: // %entry
1077-
; CHECK-NOFP16-SD-NEXT: ldr x8, [x0]
1078-
; CHECK-NOFP16-SD-NEXT: scvtf s0, x8
1079-
; CHECK-NOFP16-SD-NEXT: fcvt h0, s0
1080-
; CHECK-NOFP16-SD-NEXT: ret
1010+
; CHECK-NOFP16-LABEL: stofp_load_i64_f16:
1011+
; CHECK-NOFP16: // %bb.0: // %entry
1012+
; CHECK-NOFP16-NEXT: ldr x8, [x0]
1013+
; CHECK-NOFP16-NEXT: scvtf s0, x8
1014+
; CHECK-NOFP16-NEXT: fcvt h0, s0
1015+
; CHECK-NOFP16-NEXT: ret
10811016
;
10821017
; CHECK-FP16-LABEL: stofp_load_i64_f16:
10831018
; CHECK-FP16: // %bb.0: // %entry
10841019
; CHECK-FP16-NEXT: ldr x8, [x0]
10851020
; CHECK-FP16-NEXT: scvtf h0, x8
10861021
; CHECK-FP16-NEXT: ret
1087-
;
1088-
; CHECK-NOFP16-GI-LABEL: stofp_load_i64_f16:
1089-
; CHECK-NOFP16-GI: // %bb.0: // %entry
1090-
; CHECK-NOFP16-GI-NEXT: ldr d0, [x0]
1091-
; CHECK-NOFP16-GI-NEXT: fmov x8, d0
1092-
; CHECK-NOFP16-GI-NEXT: scvtf s0, x8
1093-
; CHECK-NOFP16-GI-NEXT: fcvt h0, s0
1094-
; CHECK-NOFP16-GI-NEXT: ret
10951022
entry:
10961023
%a = load i64, ptr %p
10971024
%c = sitofp i64 %a to half
10981025
ret half %c
10991026
}
11001027

11011028
define half @utofp_load_i64_f16(ptr %p) {
1102-
; CHECK-NOFP16-SD-LABEL: utofp_load_i64_f16:
1103-
; CHECK-NOFP16-SD: // %bb.0: // %entry
1104-
; CHECK-NOFP16-SD-NEXT: ldr x8, [x0]
1105-
; CHECK-NOFP16-SD-NEXT: ucvtf s0, x8
1106-
; CHECK-NOFP16-SD-NEXT: fcvt h0, s0
1107-
; CHECK-NOFP16-SD-NEXT: ret
1029+
; CHECK-NOFP16-LABEL: utofp_load_i64_f16:
1030+
; CHECK-NOFP16: // %bb.0: // %entry
1031+
; CHECK-NOFP16-NEXT: ldr x8, [x0]
1032+
; CHECK-NOFP16-NEXT: ucvtf s0, x8
1033+
; CHECK-NOFP16-NEXT: fcvt h0, s0
1034+
; CHECK-NOFP16-NEXT: ret
11081035
;
11091036
; CHECK-FP16-LABEL: utofp_load_i64_f16:
11101037
; CHECK-FP16: // %bb.0: // %entry
11111038
; CHECK-FP16-NEXT: ldr x8, [x0]
11121039
; CHECK-FP16-NEXT: ucvtf h0, x8
11131040
; CHECK-FP16-NEXT: ret
1114-
;
1115-
; CHECK-NOFP16-GI-LABEL: utofp_load_i64_f16:
1116-
; CHECK-NOFP16-GI: // %bb.0: // %entry
1117-
; CHECK-NOFP16-GI-NEXT: ldr d0, [x0]
1118-
; CHECK-NOFP16-GI-NEXT: fmov x8, d0
1119-
; CHECK-NOFP16-GI-NEXT: ucvtf s0, x8
1120-
; CHECK-NOFP16-GI-NEXT: fcvt h0, s0
1121-
; CHECK-NOFP16-GI-NEXT: ret
11221041
entry:
11231042
%a = load i64, ptr %p
11241043
%c = uitofp i64 %a to half

0 commit comments

Comments
 (0)