Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,17 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
SRAWI_rec
SRAWI_rec,
SRAWI8_rec
)>;

// Single crack instructions
// 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
SRAW_rec
SRAW_rec,
SRAW8_rec
)>;

// 2-way crack instructions
Expand Down Expand Up @@ -926,7 +928,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
SETNBC, SETNBC8,
SETNBCR, SETNBCR8,
SRADI, SRADI_32,
SRAWI,
SRAWI, SRAWI8,
SUBFIC, SUBFIC8,
SUBFME, SUBFME8,
SUBFME8O, SUBFMEO,
Expand Down Expand Up @@ -1008,7 +1010,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
SLD,
SLW, SLW8,
SRAD,
SRAW,
SRAW, SRAW8,
SRD,
SRW, SRW8,
SUBF, SUBF8,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/PowerPC/P9InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instregex "F(N)?ABS(D|S)$"),
(instregex "FNEG(D|S)$"),
(instregex "FCPSGN(D|S)$"),
(instregex "SRAW(I)?$"),
(instregex "SRAW(8)?$"),
(instregex "SRAWI(8)?$"),
(instregex "ISEL(8)?$"),
RLDIMI,
XSIEXPDP,
Expand Down Expand Up @@ -1091,7 +1092,8 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
(instregex "RLD(I)?C(R|L)_rec$"),
(instregex "RLW(IMI|INM|NM)(8)?_rec$"),
(instregex "SLW(8)?_rec$"),
(instregex "SRAW(I)?_rec$"),
(instregex "SRAW(8)?_rec$"),
(instregex "SRAWI(8)?_rec$"),
(instregex "SRW(8)?_rec$"),
RLDICL_32_rec,
RLDIMI_rec
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/PowerPC/PPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,18 @@ def getAltVSXFMAOpcode : InstrMapping {
let ValueCols = [["1"]];
}

def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
let FilterClass = "SExt32To64";
// Instructions with the same opcode.
let RowFields = ["Inst"];
// Instructions with the same Interpretation64Bit value form a column.
let ColFields = ["Interpretation64Bit"];
// The key column are not the Interpretation64Bit-form instructions.
let KeyCol = ["0"];
// Value columns are the Interpretation64Bit-form instructions.
let ValueCols = [["1"]];
}

//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,14 @@ defm SLW8 : XForm_6r<31, 24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
defm SRW8 : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;

defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
[]>, SExt32To64;

defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
"srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;

} // Interpretation64Bit

// For fast-isel:
Expand Down
212 changes: 212 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5234,6 +5234,218 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
// We limit the max depth to track incoming values of PHIs or binary ops
// (e.g. AND) to avoid excessive cost.
const unsigned MAX_BINOP_DEPTH = 1;

// This function will promote the instruction which defines the register `Reg`
// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
// used to check whether an instruction needs to be promoted or not is similar
// to the logic used to check whether or not a defined register is sign or zero
// extended within the function PPCInstrInfo::isSignOrZeroExtended.
// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I can see below, BinOpDepth is only ever used for AND8 and PHI instr processing and only ever incremented when processing a PHI. Maybe I am missing something or this documentation need to be updated to be more clear. Do we not care to track how many times it calls itself for other instructions?

Copy link
Contributor Author

@diggerlin diggerlin Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good Catch for AND8 . the logic of BinOpDepth is come from function isSignOrZeroExtended(), when the instructions(e.g., AND, PHI, OR) which has two or more source registers , the instructions which defines these source registers also need to be promoted and so on , without increase the BinOpDepth , it maybe have 2 * 2 * 2 * 2.... recursions of promoteInstr32To64ForElimEXTSW.

// than once. This is done to prevent exponential recursion.
void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const {
if (!Reg.isVirtual())
return;

MachineInstr *MI = MRI->getVRegDef(Reg);
if (!MI)
return;

unsigned Opcode = MI->getOpcode();
bool IsNonSignedExtInstrNeedPromoted = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of having a flag here, maybe we can setup a static function instead to replace the lambda CheckAndSetNewOpcod

// switch stmt to check opcode and set to new if needed and return true or false 
static bool SignedExtInstrPromotion(unsigned int &Opcode) {
  switch (Opcode) {
    default: return false;
    case ....

 return true;
}

Copy link
Contributor Author

@diggerlin diggerlin Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of using static helper function and I do not want a duplication code, I use a Macro definition to achieve it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO the macro seem more confusing to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I change to another way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This var is a bit of a mouth full and took me a few tries to understand what it means. Maybe a simpler var name: PromoteNonSignedExtInstr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it maybe better to change to HasNonSignedExtInstrPromoted

int NewOpcode = -1;

#define MapOpCode(A) \
case A: \
NewOpcode = A##8; \
IsNonSignedExtInstrNeedPromoted = true; \
break

switch (Opcode) {
MapOpCode(PPC::OR);
MapOpCode(PPC::ISEL);
MapOpCode(PPC::ORI);
MapOpCode(PPC::XORI);
MapOpCode(PPC::ORIS);
MapOpCode(PPC::XORIS);
MapOpCode(PPC::AND);
}
#undef MapOpCode

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: remove empty line

switch (Opcode) {
case PPC::OR:
case PPC::ISEL:
case PPC::OR8:
case PPC::PHI:
if (BinOpDepth < MAX_BINOP_DEPTH) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

early exit

Suggested change
if (BinOpDepth < MAX_BINOP_DEPTH) {
if (BinOpDepth >= MAX_BINOP_DEPTH)
break;

unsigned OperandEnd = 3, OperandStride = 1;
if (Opcode == PPC::PHI) {
OperandEnd = MI->getNumOperands();
OperandStride = 2;
}

for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
assert(MI->getOperand(I).isReg() && "Operand must be register");
Register SrcReg = MI->getOperand(I).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
}
}
break;
case PPC::COPY: {
// Refers to the logic of the `case PPC::COPY` statement in the function
// PPCInstrInfo::isSignOrZeroExtended().

Register SrcReg = MI->getOperand(1).getReg();
// In both ELFv1 and v2 ABI, method parameters and the return value
// are sign- or zero-extended.
const MachineFunction *MF = MI->getMF();
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should copy the explanation comments for these conditions from issignorzeroextended() as well

// If this is a copy from another register, we recursively promote the
// source.
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
return;
}

// From here on everything is SVR4ABI. COPY will be eliminated in the other
// pass, we do not need promote the COPY pseudo opcode.

if (SrcReg != PPC::X3)
// If this is a copy from another register, we recursively promote the
// source.
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
return;
}
case PPC::ORI:
case PPC::XORI:
case PPC::ORIS:
case PPC::XORIS:
case PPC::ORI8:
case PPC::XORI8:
case PPC::ORIS8:
case PPC::XORIS8: {
Register SrcReg = MI->getOperand(1).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need for tmp var. Can inline into call.

break;
}
case PPC::AND:
case PPC::AND8: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

brace not needed here.

if (BinOpDepth < MAX_BINOP_DEPTH) {
Register SrcReg1 = MI->getOperand(1).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg1, MRI, BinOpDepth + 1, LV);
Register SrcReg2 = MI->getOperand(2).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg2, MRI, BinOpDepth + 1, LV);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tmp var SrcReg[12] is not needed. Prefer to inline it into the call.

}
break;
}
}

const PPCInstrInfo *TII =
MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
if (!TII->isSExt32To64(Opcode) && !IsNonSignedExtInstrNeedPromoted)
return;

const TargetRegisterClass *RC = MRI->getRegClass(Reg);

if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is Reg updated anywhere above? I don't see seem to see any updates so am a bit confused why this check is here vs at the top. Can we add a comment here as to why we are exiting earlier?

nit: remove empty line 5350

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can not move the checking to top, because even the RegClass of the reg is PPC::G8RCRegClass or PPC::G8RC_and_G8RC_NOX0RegClass , we still need to check whether we need to logic in the switch statement

for example, COPY from PPC::GPRCRegClass to PPC::G8RCRegClass, we still need to do promoteInstr32To64ForElimEXTSW


// The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
// map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
// instruction with the same opcode.
if (!IsNonSignedExtInstrNeedPromoted)
NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);

assert(NewOpcode != -1 &&
"Must have a 64-bit opcode to map the 32-bit opcode!");

const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const MCInstrDesc &MCID = TII->get(NewOpcode);
const TargetRegisterClass *NewRC =
TRI->getRegClass(MCID.operands()[0].RegClass);

Register SrcReg = MI->getOperand(0).getReg();
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);

// If the register class of the defined register in the 32-bit instruction
// is the same as the register class of the defined register in the promoted
// 64-bit instruction, we do not need to promote the instruction.
if (NewRC == SrcRC)
return;

DebugLoc DL = MI->getDebugLoc();
auto MBB = MI->getParent();

// Since the pseudo-opcode of the instruction is promoted from 32-bit to
// 64-bit, if the source reg class of the original instruction belongs to
// PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
// the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
// respectively.
DenseMap<unsigned, Register> PromoteRegs;
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
MachineOperand &Operand = MI->getOperand(i);
if (!Operand.isReg())
continue;

Register OperandReg = Operand.getReg();
if (!OperandReg.isVirtual())
continue;

const TargetRegisterClass *NewUsedRegRC =
TRI->getRegClass(MCID.operands()[i].RegClass);
const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass ||
OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
// Promote the used 32-bit register to 64-bit register.
Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
.addReg(TmpReg)
.addReg(OperandReg)
.addImm(PPC::sub_32);
PromoteRegs[i] = DstTmpReg;
}
}

Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the indentation from here to return is a bit off.


BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
MachineBasicBlock::instr_iterator Iter(MI);
--Iter;
MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
if (PromoteRegs.find(i) != PromoteRegs.end())
MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
else
Iter->addOperand(MI->getOperand(i));
}

for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
MachineOperand &Operand = Iter->getOperand(i);
if (!Operand.isReg())
continue;
Register OperandReg = Operand.getReg();
if (!OperandReg.isVirtual())
continue;
LV->recomputeForSingleDefVirtReg(OperandReg);
}

MI->eraseFromParent();

// A defined register may be used by other instructions that are 32-bit.
// After the defined register is promoted to 64-bit for the promoted
// instruction, we need to demote the 64-bit defined register back to a
// 32-bit register
BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
.addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
LV->recomputeForSingleDefVirtReg(NewDefinedReg);
return;
}

// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
// does not count all of the recursions. The parameter BinOpDepth is incremented
// only when isSignOrZeroExtended calls itself more than once. This is done to
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/TargetInstrInfo.h"

#define GET_INSTRINFO_HEADER
Expand Down Expand Up @@ -624,6 +625,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const MachineRegisterInfo *MRI) const {
return isSignOrZeroExtended(Reg, 0, MRI).second;
}
void promoteInstr32To64ForElimEXTSW(const Register &Reg,
MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const;

bool convertToImmediateForm(MachineInstr &MI,
SmallSet<Register, 4> &RegsToUpdate,
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,16 @@ bool PPCMIPeephole::simplifyCode() {
} else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
// sign-extended. However, we are not sure whether a spill will occur
// during register allocation. If there is no promotion, it will use
// 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
// since the register class is 32-bits. Consequently, the high 32-bit
// information will be lost. Therefore, all these instructions in the
// chain used to deduce sign extension to eliminate the 'extsw' will
// need to be promoted to 64-bit pseudo instructions when the 'extsw'
// is eliminated.
TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);

LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
Register TmpReg =
MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCScheduleP7.td
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ let SchedModel = P7Model in {
RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAWI8, SRAWI8_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
)>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ body: |
%2 = LI 48
%5 = COPY %0.sub_32
%8 = SRW killed %5, killed %2
; CHECK: LI 0
; CHECK: LI8 0
; CHECK-LATE: li 3, 0
$x3 = EXTSW_32_64 %8
BLR8 implicit $lr8, implicit $rm, implicit $x3
Expand Down Expand Up @@ -722,7 +722,7 @@ body: |
%3 = COPY %0.sub_32
%4 = SRAW killed %3, killed %2, implicit-def dead $carry
; CHECK: LI 48
; CHECK: SRAW killed %3, killed %2, implicit-def dead $carry
; CHECK: SRAW8 killed %7, killed %9, implicit-def $carry, implicit-def dead $carry
; CHECK-LATE: sraw 3, 3, 4
%5 = EXTSW_32_64 killed %4
$x3 = COPY %5
Expand Down Expand Up @@ -779,7 +779,7 @@ body: |
%2 = LI 80
%3 = COPY %0.sub_32
%4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
; CHECK: SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
; CHECK: SRAW8_rec killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
; CHECK-LATE: sraw. 3, 3, 4
%5 = COPY killed $cr0
%6 = ISEL %2, %4, %5.sub_eq
Expand Down
Loading
Loading