Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions llvm/lib/Target/PowerPC/P10InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,17 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
SRADI_rec,
SRAWI_rec
SRAWI_rec,
SRAWI8_rec
)>;

// Single crack instructions
// 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
SRAW_rec
SRAW_rec,
SRAW8_rec
)>;

// 2-way crack instructions
Expand Down Expand Up @@ -926,7 +928,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
SETNBC, SETNBC8,
SETNBCR, SETNBCR8,
SRADI, SRADI_32,
SRAWI,
SRAWI, SRAWI8,
SUBFIC, SUBFIC8,
SUBFME, SUBFME8,
SUBFME8O, SUBFMEO,
Expand Down Expand Up @@ -1008,7 +1010,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
SLD,
SLW, SLW8,
SRAD,
SRAW,
SRAW, SRAW8,
SRD,
SRW, SRW8,
SUBF, SUBF8,
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/PowerPC/P9InstrResources.td
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
(instregex "F(N)?ABS(D|S)$"),
(instregex "FNEG(D|S)$"),
(instregex "FCPSGN(D|S)$"),
(instregex "SRAW(I)?$"),
(instregex "SRAW(8)?$"),
(instregex "SRAWI(8)?$"),
(instregex "ISEL(8)?$"),
RLDIMI,
XSIEXPDP,
Expand Down Expand Up @@ -1091,7 +1092,8 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
(instregex "RLD(I)?C(R|L)_rec$"),
(instregex "RLW(IMI|INM|NM)(8)?_rec$"),
(instregex "SLW(8)?_rec$"),
(instregex "SRAW(I)?_rec$"),
(instregex "SRAW(8)?_rec$"),
(instregex "SRAWI(8)?_rec$"),
(instregex "SRW(8)?_rec$"),
RLDICL_32_rec,
RLDIMI_rec
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/PowerPC/PPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,18 @@ def getAltVSXFMAOpcode : InstrMapping {
let ValueCols = [["1"]];
}

def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
let FilterClass = "SExt32To64";
// Instructions with the same opcode.
let RowFields = ["Inst"];
// Instructions with the same Interpretation64Bit value form a column.
let ColFields = ["Interpretation64Bit"];
// The key column are not the Interpretation64Bit-form instructions.
let KeyCol = ["0"];
// Value columns are the Interpretation64Bit-form instructions.
let ValueCols = [["1"]];
}

//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstr64Bit.td
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,14 @@ defm SLW8 : XForm_6r<31, 24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
defm SRW8 : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;

defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
[]>, SExt32To64;

defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
"srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;

} // Interpretation64Bit

// For fast-isel:
Expand Down
218 changes: 218 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5234,6 +5234,224 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
// We limit the max depth to track incoming values of PHIs or binary ops
// (e.g. AND) to avoid excessive cost.
const unsigned MAX_BINOP_DEPTH = 1;

// This function will promote the instruction which defines the register `Reg`
// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
// used to check whether an instruction needs to be promoted or not is similar
// to the logic used to check whether or not a defined register is sign or zero
// extended within the function PPCInstrInfo::isSignOrZeroExtended.
// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I can see below, BinOpDepth is only ever used for AND8 and PHI instr processing and only ever incremented when processing a PHI. Maybe I am missing something or this documentation need to be updated to be more clear. Do we not care to track how many times it calls itself for other instructions?

Copy link
Contributor Author

@diggerlin diggerlin Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good Catch for AND8 . the logic of BinOpDepth is come from function isSignOrZeroExtended(), when the instructions(e.g., AND, PHI, OR) which has two or more source registers , the instructions which defines these source registers also need to be promoted and so on , without increase the BinOpDepth , it maybe have 2 * 2 * 2 * 2.... recursions of promoteInstr32To64ForElimEXTSW.

// than once. This is done to prevent exponential recursion.
void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const {
if (!Reg.isVirtual())
return;

MachineInstr *MI = MRI->getVRegDef(Reg);
if (!MI)
return;

unsigned Opcode = MI->getOpcode();
bool IsNonSignedExtInstrNeedPromoted = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of having a flag here, maybe we can setup a static function instead to replace the lambda CheckAndSetNewOpcod

// switch stmt to check opcode and set to new if needed and return true or false 
static bool SignedExtInstrPromotion(unsigned int &Opcode) {
  switch (Opcode) {
    default: return false;
    case ....

 return true;
}

Copy link
Contributor Author

@diggerlin diggerlin Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of using static helper function and I do not want a duplication code, I use a Macro definition to achieve it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO the macro seem more confusing to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I change to another way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This var is a bit of a mouth full and took me a few tries to understand what it means. Maybe a simpler var name: PromoteNonSignedExtInstr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it maybe better to change to HasNonSignedExtInstrPromoted

int NewOpcode = -1;

auto CheckAndSetNewOpcode = [&](int NewOpc) {
if (!IsNonSignedExtInstrNeedPromoted) {
NewOpcode = NewOpc;
IsNonSignedExtInstrNeedPromoted = true;
}
};

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: remove empty line

switch (Opcode) {
case PPC::OR:
CheckAndSetNewOpcode(PPC::OR8);
[[fallthrough]];
case PPC::ISEL:
CheckAndSetNewOpcode(PPC::ISEL8);
[[fallthrough]];
case PPC::OR8:
case PPC::PHI:
if (BinOpDepth < MAX_BINOP_DEPTH) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

early exit

Suggested change
if (BinOpDepth < MAX_BINOP_DEPTH) {
if (BinOpDepth >= MAX_BINOP_DEPTH)
break;

unsigned OperandEnd = 3, OperandStride = 1;
if (Opcode == PPC::PHI) {
OperandEnd = MI->getNumOperands();
OperandStride = 2;
}

for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
assert(MI->getOperand(I).isReg() && "Operand must be register");
Register SrcReg = MI->getOperand(I).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
}
}
break;
case PPC::COPY: {
// Refers to the logic of the `case PPC::COPY` statement in the function
// PPCInstrInfo::isSignOrZeroExtended().

Register SrcReg = MI->getOperand(1).getReg();
// In both ELFv1 and v2 ABI, method parameters and the return value
// are sign- or zero-extended.
const MachineFunction *MF = MI->getMF();
if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should copy the explanation comments for these conditions from issignorzeroextended() as well

// If this is a copy from another register, we recursively promote the
// source.
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
return;
}

// From here on everything is SVR4ABI. COPY will be eliminated in the other
// pass, we do not need promote the COPY pseudo opcode.

if (SrcReg != PPC::X3)
// If this is a copy from another register, we recursively promote the
// source.
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
return;
}
case PPC::ORI:
CheckAndSetNewOpcode(PPC::ORI8);
[[fallthrough]];
case PPC::XORI:
CheckAndSetNewOpcode(PPC::XORI8);
[[fallthrough]];
case PPC::ORIS:
CheckAndSetNewOpcode(PPC::ORIS8);
[[fallthrough]];
case PPC::XORIS:
CheckAndSetNewOpcode(PPC::XORIS8);
[[fallthrough]];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This big switch stmt makes things a bit confusing. See comment above about replacing the lambda func CheckAndSetNewOpcode with a static helper function.

case PPC::ORI8:
case PPC::XORI8:
case PPC::ORIS8:
case PPC::XORIS8: {
Register SrcReg = MI->getOperand(1).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need for tmp var. Can inline into call.

break;
}
case PPC::AND:
CheckAndSetNewOpcode(PPC::AND8);
[[fallthrough]];
case PPC::AND8: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

brace not needed here.

if (BinOpDepth < MAX_BINOP_DEPTH) {
Register SrcReg1 = MI->getOperand(1).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg1, MRI, BinOpDepth, LV);
Register SrcReg2 = MI->getOperand(2).getReg();
promoteInstr32To64ForElimEXTSW(SrcReg2, MRI, BinOpDepth, LV);
}
break;
}
default:
break;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed since it doesn't do anything.

}

const PPCInstrInfo *TII =
MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
if (TII->isSExt32To64(Opcode) || IsNonSignedExtInstrNeedPromoted) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opt for early exit

if (! TII->isSExt32To64(Opcode) && ! IsNonSignedExtInstrNeedPromoted) 
  return;


const TargetRegisterClass *RC = MRI->getRegClass(Reg);

if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
return;

// The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
// map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
// instruction with the same opcode.
if (!IsNonSignedExtInstrNeedPromoted)
NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);

assert(NewOpcode != -1 &&
"Must have a 64-bit opcode to map the 32-bit opcode!");

const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
const MCInstrDesc &MCID = TII->get(NewOpcode);
const TargetRegisterClass *NewRC =
TRI->getRegClass(MCID.operands()[0].RegClass);

Register SrcReg = MI->getOperand(0).getReg();
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);

// If the register class of the defined register in the 32-bit instruction
// is the same as the register class of the defined register in the promoted
// 64-bit instruction, we do not need to promote the instruction.
if (NewRC == SrcRC)
return;

DebugLoc DL = MI->getDebugLoc();
auto MBB = MI->getParent();

// Since the pseudo-opcode of the instruction is promoted from 32-bit to
// 64-bit, if the source reg class of the original instruction belongs to
// PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
// the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
// respectively.
DenseMap<unsigned, Register> PromoteRegs;
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
MachineOperand &Operand = MI->getOperand(i);
if (Operand.isReg()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

early exit

Suggested change
if (Operand.isReg()) {
if (!Operand.isReg())
continue;

Register OperandReg = Operand.getReg();
if (!OperandReg.isVirtual())
continue;

const TargetRegisterClass *NewUsedRegRC =
TRI->getRegClass(MCID.operands()[i].RegClass);
const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
if (NewUsedRegRC != OrgRC &&
(OrgRC == &PPC::GPRCRegClass ||
OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
// Promote the used 32-bit register to 64-bit register.
Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
.addReg(TmpReg)
.addReg(OperandReg)
.addImm(PPC::sub_32);
PromoteRegs[i] = DstTmpReg;
}
}
}

Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the indentation from here to return is a bit off.


BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
MachineBasicBlock::instr_iterator Iter(MI);
--Iter;
MachineInstrBuilder MIBuilder(*Iter->getMF(), Iter);
for (unsigned i = 1; i < MI->getNumOperands(); i++) {
if (PromoteRegs.find(i) != PromoteRegs.end())
MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
else
Iter->addOperand(MI->getOperand(i));
}

for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
MachineOperand &Operand = Iter->getOperand(i);
if (Operand.isReg()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opt for early exit

Register OperandReg = Operand.getReg();
if (!OperandReg.isVirtual())
continue;
LV->recomputeForSingleDefVirtReg(OperandReg);
}
}

MI->eraseFromParent();

// A defined register may be used by other instructions that are 32-bit.
// After the defined register is promoted to 64-bit for the promoted
// instruction, we need to demote the 64-bit defined register back to a
// 32-bit register
BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
.addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
LV->recomputeForSingleDefVirtReg(NewDefinedReg);
}
return;
}

// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
// does not count all of the recursions. The parameter BinOpDepth is incremented
// only when isSignOrZeroExtended calls itself more than once. This is done to
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/TargetInstrInfo.h"

#define GET_INSTRINFO_HEADER
Expand Down Expand Up @@ -624,6 +625,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const MachineRegisterInfo *MRI) const {
return isSignOrZeroExtended(Reg, 0, MRI).second;
}
void promoteInstr32To64ForElimEXTSW(const Register &Reg,
MachineRegisterInfo *MRI,
unsigned BinOpDepth,
LiveVariables *LV) const;

bool convertToImmediateForm(MachineInstr &MI,
SmallSet<Register, 4> &RegsToUpdate,
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,16 @@ bool PPCMIPeephole::simplifyCode() {
} else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
// sign-extended. However, we are not sure whether a spill will occur
// during register allocation. If there is no promotion, it will use
// 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
// since the register class is 32-bits. Consequently, the high 32-bit
// information will be lost. Therefore, all these instructions in the
// chain used to deduce sign extension to eliminate the 'extsw' will
// need to be promoted to 64-bit pseudo instructions when the 'extsw'
// is eliminated.
TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);

LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
Register TmpReg =
MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCScheduleP7.td
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ let SchedModel = P7Model in {
RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAWI8, SRAWI8_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
)>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ body: |
%2 = LI 48
%5 = COPY %0.sub_32
%8 = SRW killed %5, killed %2
; CHECK: LI 0
; CHECK: LI8 0
; CHECK-LATE: li 3, 0
$x3 = EXTSW_32_64 %8
BLR8 implicit $lr8, implicit $rm, implicit $x3
Expand Down Expand Up @@ -722,7 +722,7 @@ body: |
%3 = COPY %0.sub_32
%4 = SRAW killed %3, killed %2, implicit-def dead $carry
; CHECK: LI 48
; CHECK: SRAW killed %3, killed %2, implicit-def dead $carry
; CHECK: SRAW8 killed %7, killed %9, implicit-def $carry, implicit-def dead $carry
; CHECK-LATE: sraw 3, 3, 4
%5 = EXTSW_32_64 killed %4
$x3 = COPY %5
Expand Down Expand Up @@ -779,7 +779,7 @@ body: |
%2 = LI 80
%3 = COPY %0.sub_32
%4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
; CHECK: SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
; CHECK: SRAW8_rec killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
; CHECK-LATE: sraw. 3, 3, 4
%5 = COPY killed $cr0
%6 = ISEL %2, %4, %5.sub_eq
Expand Down
Loading