Skip to content

Commit 036bc79

Browse files
committed
[AArch64][GlobalISel] Assign FPR banks to loads which are used by integer->float conversions.
G_[US]ITOFP users of loads on AArch64 can operate on both gpr and fpr banks for scalars. Because of this, if their source is a load, then that load can be assigned to an fpr bank and therefore avoid having to do a cross bank copy via a gpr->fpr conversion. Differential Revision: https://reviews.llvm.org/D94701
1 parent d6acf3c commit 036bc79

File tree

2 files changed

+56
-4
lines changed

2 files changed

+56
-4
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -680,11 +680,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
680680
break;
681681
}
682682
case TargetOpcode::G_SITOFP:
683-
case TargetOpcode::G_UITOFP:
683+
case TargetOpcode::G_UITOFP: {
684684
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
685685
break;
686-
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
686+
// Integer to FP conversions don't necessarily happen between GPR -> FPR
687+
// regbanks. They can also be done within an FPR register.
688+
Register SrcReg = MI.getOperand(1).getReg();
689+
if (getRegBank(SrcReg, MRI, TRI) == &AArch64::FPRRegBank)
690+
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
691+
else
692+
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
687693
break;
694+
}
688695
case TargetOpcode::G_FPTOSI:
689696
case TargetOpcode::G_FPTOUI:
690697
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
@@ -722,7 +729,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
722729
// assume this was a floating point load in the IR.
723730
// If it was not, we would have had a bitcast before
724731
// reaching that instruction.
725-
if (onlyUsesFP(UseMI, MRI, TRI)) {
732+
// Int->FP conversion operations are also captured in onlyDefinesFP().
733+
if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
726734
OpRegBankIdx[0] = PMI_FirstFPR;
727735
break;
728736
}

llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Check that we correctly assign register banks based off of instructions which
55
# only use or only define FPRs.
66
#
7-
# For example, G_SITOFP takes in a GPR, but only ever produces values on FPRs.
7+
# For example, G_SITOFP may take in a GPR, but only ever produces values on FPRs.
88
# Some instructions can have inputs/outputs on either FPRs or GPRs. If one of
99
# those instructions takes in the result of a G_SITOFP as a source, we should
1010
# put that source on a FPR.
@@ -361,3 +361,47 @@ body: |
361361
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
362362
$s0 = COPY %phi(s32)
363363
RET_ReallyLR implicit $s0
364+
365+
...
366+
---
367+
name: load_used_by_sitofp
368+
legalized: true
369+
tracksRegLiveness: true
370+
body: |
371+
bb.0:
372+
liveins: $x0
373+
; The load should be assigned an fpr bank because it's used by the sitofp.
374+
; The sitofp should assign both src and dest to FPR, resulting in no copies.
375+
; CHECK-LABEL: name: load_used_by_sitofp
376+
; CHECK: liveins: $x0
377+
; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
378+
; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4)
379+
; CHECK: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[LOAD]](s32)
380+
; CHECK: $s0 = COPY [[SITOFP]](s32)
381+
; CHECK: RET_ReallyLR implicit $s0
382+
%0:_(p0) = COPY $x0
383+
%1:_(s32) = G_LOAD %0 :: (load 4)
384+
%2:_(s32) = G_SITOFP %1:_(s32)
385+
$s0 = COPY %2(s32)
386+
RET_ReallyLR implicit $s0
387+
...
388+
---
389+
name: load_used_by_uitofp
390+
legalized: true
391+
tracksRegLiveness: true
392+
body: |
393+
bb.0:
394+
liveins: $x0
395+
; CHECK-LABEL: name: load_used_by_uitofp
396+
; CHECK: liveins: $x0
397+
; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
398+
; CHECK: [[LOAD:%[0-9]+]]:fpr(s32) = G_LOAD [[COPY]](p0) :: (load 4)
399+
; CHECK: [[UITOFP:%[0-9]+]]:fpr(s32) = G_UITOFP [[LOAD]](s32)
400+
; CHECK: $s0 = COPY [[UITOFP]](s32)
401+
; CHECK: RET_ReallyLR implicit $s0
402+
%0:_(p0) = COPY $x0
403+
%1:_(s32) = G_LOAD %0 :: (load 4)
404+
%2:_(s32) = G_UITOFP %1:_(s32)
405+
$s0 = COPY %2(s32)
406+
RET_ReallyLR implicit $s0
407+
...

0 commit comments

Comments
 (0)