Skip to content

Commit 868f23f

Browse files
authored
[LoongArch] Override hooks to enable sink-and-fold support in MachineSink (#163721)
Add option `loongarch-enable-sink-fold` to enable sink-fold and set `true` as default. This pass can fold `addi+load/store` to a single `load/store` with offset.
1 parent 49f55f4 commit 868f23f

File tree

6 files changed

+208
-62
lines changed

6 files changed

+208
-62
lines changed

llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,155 @@ LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
756756
return ArrayRef(TargetFlags);
757757
}
758758

759+
bool LoongArchInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
760+
Register Reg,
761+
const MachineInstr &AddrI,
762+
ExtAddrMode &AM) const {
763+
enum MemIOffsetType {
764+
Imm14Shift2,
765+
Imm12,
766+
Imm11Shift1,
767+
Imm10Shift2,
768+
Imm9Shift3,
769+
Imm8,
770+
Imm8Shift1,
771+
Imm8Shift2,
772+
Imm8Shift3
773+
};
774+
775+
MemIOffsetType OT;
776+
switch (MemI.getOpcode()) {
777+
default:
778+
return false;
779+
case LoongArch::LDPTR_W:
780+
case LoongArch::LDPTR_D:
781+
case LoongArch::STPTR_W:
782+
case LoongArch::STPTR_D:
783+
OT = Imm14Shift2;
784+
break;
785+
case LoongArch::LD_B:
786+
case LoongArch::LD_H:
787+
case LoongArch::LD_W:
788+
case LoongArch::LD_D:
789+
case LoongArch::LD_BU:
790+
case LoongArch::LD_HU:
791+
case LoongArch::LD_WU:
792+
case LoongArch::ST_B:
793+
case LoongArch::ST_H:
794+
case LoongArch::ST_W:
795+
case LoongArch::ST_D:
796+
case LoongArch::FLD_S:
797+
case LoongArch::FLD_D:
798+
case LoongArch::FST_S:
799+
case LoongArch::FST_D:
800+
case LoongArch::VLD:
801+
case LoongArch::VST:
802+
case LoongArch::XVLD:
803+
case LoongArch::XVST:
804+
case LoongArch::VLDREPL_B:
805+
case LoongArch::XVLDREPL_B:
806+
OT = Imm12;
807+
break;
808+
case LoongArch::VLDREPL_H:
809+
case LoongArch::XVLDREPL_H:
810+
OT = Imm11Shift1;
811+
break;
812+
case LoongArch::VLDREPL_W:
813+
case LoongArch::XVLDREPL_W:
814+
OT = Imm10Shift2;
815+
break;
816+
case LoongArch::VLDREPL_D:
817+
case LoongArch::XVLDREPL_D:
818+
OT = Imm9Shift3;
819+
break;
820+
case LoongArch::VSTELM_B:
821+
case LoongArch::XVSTELM_B:
822+
OT = Imm8;
823+
break;
824+
case LoongArch::VSTELM_H:
825+
case LoongArch::XVSTELM_H:
826+
OT = Imm8Shift1;
827+
break;
828+
case LoongArch::VSTELM_W:
829+
case LoongArch::XVSTELM_W:
830+
OT = Imm8Shift2;
831+
break;
832+
case LoongArch::VSTELM_D:
833+
case LoongArch::XVSTELM_D:
834+
OT = Imm8Shift3;
835+
break;
836+
}
837+
838+
if (MemI.getOperand(0).getReg() == Reg)
839+
return false;
840+
841+
if ((AddrI.getOpcode() != LoongArch::ADDI_W &&
842+
AddrI.getOpcode() != LoongArch::ADDI_D) ||
843+
!AddrI.getOperand(1).isReg() || !AddrI.getOperand(2).isImm())
844+
return false;
845+
846+
int64_t OldOffset = MemI.getOperand(2).getImm();
847+
int64_t Disp = AddrI.getOperand(2).getImm();
848+
int64_t NewOffset = OldOffset + Disp;
849+
if (!STI.is64Bit())
850+
NewOffset = SignExtend64<32>(NewOffset);
851+
852+
if (!(OT == Imm14Shift2 && isShiftedInt<14, 2>(NewOffset) && STI.hasUAL()) &&
853+
!(OT == Imm12 && isInt<12>(NewOffset)) &&
854+
!(OT == Imm11Shift1 && isShiftedInt<11, 1>(NewOffset)) &&
855+
!(OT == Imm10Shift2 && isShiftedInt<10, 2>(NewOffset)) &&
856+
!(OT == Imm9Shift3 && isShiftedInt<9, 3>(NewOffset)) &&
857+
!(OT == Imm8 && isInt<8>(NewOffset)) &&
858+
!(OT == Imm8Shift1 && isShiftedInt<8, 1>(NewOffset)) &&
859+
!(OT == Imm8Shift2 && isShiftedInt<8, 2>(NewOffset)) &&
860+
!(OT == Imm8Shift3 && isShiftedInt<8, 3>(NewOffset)))
861+
return false;
862+
863+
AM.BaseReg = AddrI.getOperand(1).getReg();
864+
AM.ScaledReg = 0;
865+
AM.Scale = 0;
866+
AM.Displacement = NewOffset;
867+
AM.Form = ExtAddrMode::Formula::Basic;
868+
return true;
869+
}
870+
871+
MachineInstr *
872+
LoongArchInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
873+
const ExtAddrMode &AM) const {
874+
const DebugLoc &DL = MemI.getDebugLoc();
875+
MachineBasicBlock &MBB = *MemI.getParent();
876+
877+
assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
878+
"Addressing mode not supported for folding");
879+
880+
unsigned MemIOp = MemI.getOpcode();
881+
switch (MemIOp) {
882+
default:
883+
return BuildMI(MBB, MemI, DL, get(MemIOp))
884+
.addReg(MemI.getOperand(0).getReg(),
885+
MemI.mayLoad() ? RegState::Define : 0)
886+
.addReg(AM.BaseReg)
887+
.addImm(AM.Displacement)
888+
.setMemRefs(MemI.memoperands())
889+
.setMIFlags(MemI.getFlags());
890+
case LoongArch::VSTELM_B:
891+
case LoongArch::VSTELM_H:
892+
case LoongArch::VSTELM_W:
893+
case LoongArch::VSTELM_D:
894+
case LoongArch::XVSTELM_B:
895+
case LoongArch::XVSTELM_H:
896+
case LoongArch::XVSTELM_W:
897+
case LoongArch::XVSTELM_D:
898+
return BuildMI(MBB, MemI, DL, get(MemIOp))
899+
.addReg(MemI.getOperand(0).getReg(), 0)
900+
.addReg(AM.BaseReg)
901+
.addImm(AM.Displacement)
902+
.addImm(MemI.getOperand(3).getImm())
903+
.setMemRefs(MemI.memoperands())
904+
.setMIFlags(MemI.getFlags());
905+
}
906+
}
907+
759908
// Returns true if this is the sext.w pattern, addi.w rd, rs, 0.
760909
bool LoongArch::isSEXT_W(const MachineInstr &MI) {
761910
return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() &&

llvm/lib/Target/LoongArch/LoongArchInstrInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
9393
ArrayRef<std::pair<unsigned, const char *>>
9494
getSerializableBitmaskMachineOperandTargetFlags() const override;
9595

96+
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
97+
const MachineInstr &AddrI,
98+
ExtAddrMode &AM) const override;
99+
MachineInstr *emitLdStWithAddr(MachineInstr &MemI,
100+
const ExtAddrMode &AM) const override;
101+
96102
protected:
97103
const LoongArchSubtarget &STI;
98104
};

llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ static cl::opt<bool>
6262
cl::desc("Enable the merge base offset pass"),
6363
cl::init(true), cl::Hidden);
6464

65+
static cl::opt<bool>
66+
EnableSinkFold("loongarch-enable-sink-fold",
67+
cl::desc("Enable sinking and folding of instruction copies"),
68+
cl::init(true), cl::Hidden);
69+
6570
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
6671
return RM.value_or(Reloc::Static);
6772
}
@@ -146,7 +151,9 @@ namespace {
146151
class LoongArchPassConfig : public TargetPassConfig {
147152
public:
148153
LoongArchPassConfig(LoongArchTargetMachine &TM, PassManagerBase &PM)
149-
: TargetPassConfig(TM, PM) {}
154+
: TargetPassConfig(TM, PM) {
155+
setEnableSinkAndFold(EnableSinkFold);
156+
}
150157

151158
LoongArchTargetMachine &getLoongArchTargetMachine() const {
152159
return getTM<LoongArchTargetMachine>();

llvm/test/CodeGen/LoongArch/ldptr.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ define signext i32 @ldptr_w(ptr %p) nounwind {
2424
; LA32-LABEL: ldptr_w:
2525
; LA32: # %bb.0: # %entry
2626
; LA32-NEXT: addi.w $a0, $a0, 2047
27-
; LA32-NEXT: addi.w $a0, $a0, 1
28-
; LA32-NEXT: ld.w $a0, $a0, 0
27+
; LA32-NEXT: ld.w $a0, $a0, 1
2928
; LA32-NEXT: ret
3029
;
3130
; LA64-LABEL: ldptr_w:
@@ -81,10 +80,9 @@ entry:
8180
define i64 @ldptr_d(ptr %p) nounwind {
8281
; LA32-LABEL: ldptr_d:
8382
; LA32: # %bb.0: # %entry
84-
; LA32-NEXT: addi.w $a0, $a0, 2047
85-
; LA32-NEXT: addi.w $a1, $a0, 1
86-
; LA32-NEXT: ld.w $a0, $a1, 0
87-
; LA32-NEXT: ld.w $a1, $a1, 4
83+
; LA32-NEXT: addi.w $a1, $a0, 2047
84+
; LA32-NEXT: ld.w $a0, $a1, 1
85+
; LA32-NEXT: ld.w $a1, $a1, 5
8886
; LA32-NEXT: ret
8987
;
9088
; LA64-LABEL: ldptr_d:

0 commit comments

Comments
 (0)