Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator &NextMBBI);
bool expandCCOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandCCOpToCMov(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
bool expandMV_FPR16INX(MachineBasicBlock &MBB,
Expand Down Expand Up @@ -178,6 +180,9 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
// First try expanding to a Conditional Move rather than a branch+mv
if (expandCCOpToCMov(MBB, MBBI))
return true;

MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
Expand Down Expand Up @@ -277,6 +282,86 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
return true;
}

bool RISCVExpandPseudo::expandCCOpToCMov(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();

if (MI.getOpcode() != RISCV::PseudoCCMOVGPR &&
MI.getOpcode() != RISCV::PseudoCCMOVGPRNoX0)
return false;

if (!STI->hasVendorXqcicm())
return false;

// FIXME: Would be wonderful to support LHS=X0, but not very easy.
if (MI.getOperand(1).getReg() == RISCV::X0 ||
MI.getOperand(4).getReg() == RISCV::X0 ||
MI.getOperand(5).getReg() == RISCV::X0)
return false;

auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());

unsigned CMovOpcode, CMovIOpcode;
switch (CC) {
default:
llvm_unreachable("Unhandled CC");
case RISCVCC::COND_EQ:
CMovOpcode = RISCV::QC_MVEQ;
CMovIOpcode = RISCV::QC_MVEQI;
break;
case RISCVCC::COND_NE:
CMovOpcode = RISCV::QC_MVNE;
CMovIOpcode = RISCV::QC_MVNEI;
break;
case RISCVCC::COND_LT:
CMovOpcode = RISCV::QC_MVLT;
CMovIOpcode = RISCV::QC_MVNEI;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be MVLTI?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, which shows my testing isn't good enough yet. Will look at adding additional relevant RUN lines

break;
case RISCVCC::COND_GE:
CMovOpcode = RISCV::QC_MVGE;
CMovIOpcode = RISCV::QC_MVGEI;
break;
case RISCVCC::COND_LTU:
CMovOpcode = RISCV::QC_MVLTU;
CMovIOpcode = RISCV::QC_MVLTUI;
break;
case RISCVCC::COND_GEU:
CMovOpcode = RISCV::QC_MVGEU;
CMovIOpcode = RISCV::QC_MVGEUI;
break;
}

if (MI.getOperand(2).getReg() == RISCV::X0) {
// $dst = PseudoCCMOVGPR $lhs, X0, $cc, $falsev (=$dst), $truev
// $dst = PseudoCCMOVGPRNoX0 $lhs, X0, $cc, $falsev (=$dst), $truev
// =>
// $dst = QC_MVccI $falsev (=$dst), $lhs, 0, $truev
BuildMI(MBB, MBBI, DL, TII->get(CMovIOpcode))
.addDef(MI.getOperand(0).getReg())
.addReg(MI.getOperand(4).getReg())
.addReg(MI.getOperand(1).getReg())
.addImm(0)
.addReg(MI.getOperand(5).getReg());

MI.eraseFromParent();
return true;
}

// $dst = PseudoCCMOVGPR $lhs, $rhs, $cc, $falsev (=$dst), $truev
// $dst = PseudoCCMOVGPRNoX0 $lhs, $rhs, $cc, $falsev (=$dst), $truev
// =>
// $dst = QC_MVcc $falsev (=$dst), $lhs, $rhs, $truev
BuildMI(MBB, MBBI, DL, TII->get(CMovOpcode))
.addDef(MI.getOperand(0).getReg())
.addReg(MI.getOperand(4).getReg())
.addReg(MI.getOperand(1).getReg())
.addReg(MI.getOperand(2).getReg())
.addReg(MI.getOperand(5).getReg());
MI.eraseFromParent();
return true;
}

bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Opcode) {
Expand Down
37 changes: 23 additions & 14 deletions llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,10 @@ class QCIMVCCIPat<CondCode Cond, QCIMVCCI Inst, DAGOperand InTyImm>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), InTyImm:$imm, Cond, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(Inst GPRNoX0:$rd, GPRNoX0:$rs1, InTyImm:$imm, GPRNoX0:$rs3)>;

class QCIMVCCIZeroPat<CondCode Cond, QCIMVCCI Inst>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), Cond, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(Inst GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;

class QCISELECTCCIPat<CondCode Cond, QCISELECTCCI Inst>
: Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rd), simm5:$imm, Cond, (i32 GPRNoX0:$rs2), (i32 GPRNoX0:$rs3))),
(Inst GPRNoX0:$rd, simm5:$imm, GPRNoX0:$rs2, GPRNoX0:$rs3)>;
Expand Down Expand Up @@ -1538,27 +1542,32 @@ def: Pat<(i32 (ctlz (not (i32 GPR:$rs1)))), (QC_CLO GPR:$rs1)>;
let Predicates = [HasVendorXqciint, IsRV32] in
def : Pat<(riscv_mileaveret_glue), (QC_C_MILEAVERET)>;

let Predicates = [HasVendorXqcicm, IsRV32] in {
// (SELECT X, Y, Z) is canonicalised to `(riscv_selectcc x, 0, NE, y, z)`.
// This exists to prioritise over the `Select_GPR_Using_CC_GPR` pattern.
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETNE, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(QC_MVNEI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETEQ, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(QC_MVEQI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;

let Predicates = [HasVendorXqcicm, NoShortForwardBranchOpt, IsRV32] in {
def : QCIMVCCPat<SETEQ, QC_MVEQ>;
def : QCIMVCCPat<SETNE, QC_MVNE>;
def : QCIMVCCPat<SETLT, QC_MVLT>;
def : QCIMVCCPat<SETULT, QC_MVLTU>;
def : QCIMVCCPat<SETGE, QC_MVGE>;
def : QCIMVCCPat<SETUGE, QC_MVGEU>;

def : QCIMVCCIPat<SETEQ, QC_MVEQI, simm5>;
def : QCIMVCCIPat<SETNE, QC_MVNEI, simm5>;
def : QCIMVCCIPat<SETLT, QC_MVLTI, simm5>;
def : QCIMVCCIPat<SETULT, QC_MVLTUI, uimm5>;
def : QCIMVCCIPat<SETGE, QC_MVGEI, simm5>;
def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5>;
// These exist to prioritise over the `Select_GPR_Using_CC_GPR` pattern for X0.
def : QCIMVCCIZeroPat<SETEQ, QC_MVEQI>;
def : QCIMVCCIZeroPat<SETNE, QC_MVNEI>;
def : QCIMVCCIZeroPat<SETLT, QC_MVLTI>;
def : QCIMVCCIZeroPat<SETULT, QC_MVLTUI>;
def : QCIMVCCIZeroPat<SETGE, QC_MVGEI>;
def : QCIMVCCIZeroPat<SETUGE, QC_MVGEUI>;
}

let Predicates = [HasVendorXqcicm, IsRV32] in {
// These all use *imm5nonzero because we want to use PseudoCCMOVGPR with X0 when SFB is enabled.
// When SFB is not enabled, the `QCIMVCCIZeroPat`s above will be used if RHS=0.
def : QCIMVCCIPat<SETEQ, QC_MVEQI, simm5nonzero>;
def : QCIMVCCIPat<SETNE, QC_MVNEI, simm5nonzero>;
def : QCIMVCCIPat<SETLT, QC_MVLTI, simm5nonzero>;
def : QCIMVCCIPat<SETULT, QC_MVLTUI, uimm5nonzero>;
def : QCIMVCCIPat<SETGE, QC_MVGEI, simm5nonzero>;
def : QCIMVCCIPat<SETUGE, QC_MVGEUI, uimm5nonzero>;
}

let Predicates = [HasVendorXqcicli, IsRV32] in {
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/select-bare.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: | FileCheck %s -check-prefix=RV32I
; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI

define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind {
Expand Down
59 changes: 29 additions & 30 deletions llvm/test/CodeGen/RISCV/select-cc.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -disable-block-placement -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV32I %s
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI
; RUN: llc -mtriple=riscv64 -disable-block-placement -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64I %s
Expand Down Expand Up @@ -88,39 +88,38 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind {
; RV32IXQCI-LABEL: foo:
; RV32IXQCI: # %bb.0:
; RV32IXQCI-NEXT: lw a2, 0(a1)
; RV32IXQCI-NEXT: lw a4, 0(a1)
; RV32IXQCI-NEXT: lw t5, 0(a1)
; RV32IXQCI-NEXT: lw t4, 0(a1)
; RV32IXQCI-NEXT: lw t3, 0(a1)
; RV32IXQCI-NEXT: lw t2, 0(a1)
; RV32IXQCI-NEXT: lw t0, 0(a1)
; RV32IXQCI-NEXT: lw a7, 0(a1)
; RV32IXQCI-NEXT: lw a6, 0(a1)
; RV32IXQCI-NEXT: lw a3, 0(a1)
; RV32IXQCI-NEXT: lw t1, 0(a1)
; RV32IXQCI-NEXT: lw a4, 0(a1)
; RV32IXQCI-NEXT: lw a5, 0(a1)
; RV32IXQCI-NEXT: bltz t1, .LBB0_2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was there a branch here before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Broadly because t111: i32 = RISCVISD::SELECT_CC t47, Constant:i32<0>, setge:ch, t112, t51 was getting selected into t111: i32 = Select_GPR_Using_CC_GPR t47, Register:i32 $x0, TargetConstant:i32<3> /*==RISCVCC::COND_GE*/, t112, t51.

This is due to the pattern here:

def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond, (vt valty:$truev),
valty:$falsev),
(!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, (XLenVT X0),
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
which has Complexity=8.

There was only the generic QC_MVGE pattern for SETGE, which has complexity=3. We did have some Complexity=8 patterns with 0 literals (for QC_MVccI, but only for SETEQ/SETNE:

def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETNE, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(QC_MVNEI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;
def : Pat<(i32 (riscv_selectcc (i32 GPRNoX0:$rs1), (i32 0), SETEQ, (i32 GPRNoX0:$rs3), (i32 GPRNoX0:$rd))),
(QC_MVEQI GPRNoX0:$rd, GPRNoX0:$rs1, 0, GPRNoX0:$rs3)>;

This PR wrapped up the latter patterns into QCIMVCCIZeroPat, and added variants for all the condition codes supported.

These two patterns being almost identical and having the same complexity is not a problem, because the Select_GPR_Using_CC_GPR has a custom inserter, which adds to the cost in getResultPatternCost as used by the PatternSortingPredicate, so it will always come after a single-instruction result with no custom inserter, as is present in QCIMVCCIZeroPat.

I've been trying to work through internally to make sure that all our patterns are totally ordered with PatternSortingPredicate (when the patterns are enabled). We have one last place where there's an overlap and that sorting predicate is not able to differentiate the patterns, which we know we need to solve.

; RV32IXQCI-NEXT: qc.mvne a0, a0, a2, a2
; RV32IXQCI-NEXT: qc.mveq a0, a0, a3, a3
; RV32IXQCI-NEXT: lw a2, 0(a1)
; RV32IXQCI-NEXT: qc.mvgeu a0, a4, a0, a4
; RV32IXQCI-NEXT: lw a3, 0(a1)
; RV32IXQCI-NEXT: qc.mvltu a0, a0, a5, a5
; RV32IXQCI-NEXT: lw a4, 0(a1)
; RV32IXQCI-NEXT: qc.mvgeu a0, a0, a2, a2
; RV32IXQCI-NEXT: lw a2, 0(a1)
; RV32IXQCI-NEXT: qc.mvltu a0, a3, a0, a3
; RV32IXQCI-NEXT: lw a3, 0(a1)
; RV32IXQCI-NEXT: qc.mvge a0, a4, a0, a4
; RV32IXQCI-NEXT: lw a4, 0(a1)
; RV32IXQCI-NEXT: qc.mvlt a0, a0, a2, a2
; RV32IXQCI-NEXT: lw a2, 0(a1)
; RV32IXQCI-NEXT: qc.mvge a0, a0, a3, a3
; RV32IXQCI-NEXT: lw a3, 0(a1)
; RV32IXQCI-NEXT: qc.mvlt a0, a4, a0, a4
; RV32IXQCI-NEXT: lw a4, 0(a1)
; RV32IXQCI-NEXT: lw a1, 0(a1)
; RV32IXQCI-NEXT: blez a2, .LBB0_2
; RV32IXQCI-NEXT: # %bb.1:
; RV32IXQCI-NEXT: li a5, 0
; RV32IXQCI-NEXT: qc.mveq a2, a0, a2, a0
; RV32IXQCI-NEXT: qc.mvne a4, a2, a4, a2
; RV32IXQCI-NEXT: qc.mvltu t5, t5, a4, a4
; RV32IXQCI-NEXT: qc.mvgeu t4, t5, t4, t5
; RV32IXQCI-NEXT: qc.mvltu t3, t4, t3, t4
; RV32IXQCI-NEXT: qc.mvgeu t2, t2, t3, t3
; RV32IXQCI-NEXT: qc.mvlt t0, t0, t2, t2
; RV32IXQCI-NEXT: qc.mvge a7, t0, a7, t0
; RV32IXQCI-NEXT: qc.mvlt a6, a7, a6, a7
; RV32IXQCI-NEXT: qc.mvge a3, a3, a6, a6
; RV32IXQCI-NEXT: qc.mvlt a3, a5, t1, t1
; RV32IXQCI-NEXT: mv a5, a3
; RV32IXQCI-NEXT: mv a0, a2
; RV32IXQCI-NEXT: .LBB0_2:
; RV32IXQCI-NEXT: lw a2, 0(a1)
; RV32IXQCI-NEXT: lw a0, 0(a1)
; RV32IXQCI-NEXT: li a1, 1024
; RV32IXQCI-NEXT: qc.mvlt a2, a1, a2, a5
; RV32IXQCI-NEXT: li a1, 2046
; RV32IXQCI-NEXT: qc.mvltu a0, a1, t1, a2
; RV32IXQCI-NEXT: qc.mvnei a0, a2, 0, a3
; RV32IXQCI-NEXT: li a3, 1024
; RV32IXQCI-NEXT: qc.mvge a0, a3, a4, a4
; RV32IXQCI-NEXT: li a3, 2046
; RV32IXQCI-NEXT: qc.mvgeu a0, a3, a2, a1
; RV32IXQCI-NEXT: ret
;
; RV64I-LABEL: foo:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/select-cond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
; RUN: | FileCheck %s --check-prefixes=RV32-XQCICM
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcics -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32-XQCICS
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli -verify-machineinstrs < %s \
; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV32IXQCI
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=RV64
Expand Down
Loading