Skip to content

Commit 04619db

Browse files
authored
[RISCV] Add short forward branch support for mul instruction (#166300)
1 parent 546a783 commit 04619db

File tree

6 files changed

+169
-0
lines changed

6 files changed

+169
-0
lines changed

llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
131131
case RISCV::PseudoCCMAXU:
132132
case RISCV::PseudoCCMIN:
133133
case RISCV::PseudoCCMINU:
134+
case RISCV::PseudoCCMUL:
134135
case RISCV::PseudoCCADDW:
135136
case RISCV::PseudoCCSUBW:
136137
case RISCV::PseudoCCSLL:
@@ -237,6 +238,7 @@ bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
237238
case RISCV::PseudoCCMIN: NewOpc = RISCV::MIN; break;
238239
case RISCV::PseudoCCMAXU: NewOpc = RISCV::MAXU; break;
239240
case RISCV::PseudoCCMINU: NewOpc = RISCV::MINU; break;
241+
case RISCV::PseudoCCMUL: NewOpc = RISCV::MUL; break;
240242
case RISCV::PseudoCCADDI: NewOpc = RISCV::ADDI; break;
241243
case RISCV::PseudoCCSLLI: NewOpc = RISCV::SLLI; break;
242244
case RISCV::PseudoCCSRLI: NewOpc = RISCV::SRLI; break;

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1856,6 +1856,11 @@ def TuneShortForwardBranchIMinMax
18561856
"true", "Enable short forward branch optimization for min,max instructions in Zbb",
18571857
[TuneShortForwardBranchOpt]>;
18581858

1859+
def TuneShortForwardBranchIMul
1860+
: SubtargetFeature<"short-forward-branch-i-mul", "HasShortForwardBranchIMul",
1861+
"true", "Enable short forward branch optimization for mul instruction",
1862+
[TuneShortForwardBranchOpt]>;
1863+
18591864
// Some subtargets require a S2V transfer buffer to move scalars into vectors.
18601865
// FIXME: Forming .vx/.vf/.wx/.wf can reduce register pressure.
18611866
def TuneNoSinkSplatOperands

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,6 +1703,7 @@ unsigned getPredicatedOpcode(unsigned Opcode) {
17031703
case RISCV::MAXU: return RISCV::PseudoCCMAXU;
17041704
case RISCV::MIN: return RISCV::PseudoCCMIN;
17051705
case RISCV::MINU: return RISCV::PseudoCCMINU;
1706+
case RISCV::MUL: return RISCV::PseudoCCMUL;
17061707

17071708
case RISCV::ADDI: return RISCV::PseudoCCADDI;
17081709
case RISCV::SLLI: return RISCV::PseudoCCSLLI;
@@ -1754,6 +1755,9 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
17541755
MI->getOpcode() == RISCV::MINU || MI->getOpcode() == RISCV::MAXU))
17551756
return nullptr;
17561757

1758+
if (!STI.hasShortForwardBranchIMul() && MI->getOpcode() == RISCV::MUL)
1759+
return nullptr;
1760+
17571761
// Check if MI can be predicated and folded into the CCMOV.
17581762
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
17591763
return nullptr;

llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ def PseudoCCMAX : SFBALU_rr;
110110
def PseudoCCMIN : SFBALU_rr;
111111
def PseudoCCMAXU : SFBALU_rr;
112112
def PseudoCCMINU : SFBALU_rr;
113+
def PseudoCCMUL : SFBALU_rr;
113114

114115
def PseudoCCADDI : SFBALU_ri;
115116
def PseudoCCANDI : SFBALU_ri;

llvm/test/CodeGen/RISCV/features-info.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137
; CHECK-NEXT: shifted-zextw-fusion - Enable SLLI+SRLI to be fused when computing (shifted) word zero extension.
138138
; CHECK-NEXT: shlcofideleg - 'Shlcofideleg' (Delegating LCOFI Interrupts to VS-mode).
139139
; CHECK-NEXT: short-forward-branch-i-minmax - Enable short forward branch optimization for min,max instructions in Zbb.
140+
; CHECK-NEXT: short-forward-branch-i-mul - Enable short forward branch optimization for mul instruction.
140141
; CHECK-NEXT: short-forward-branch-opt - Enable short forward branch optimization.
141142
; CHECK-NEXT: shtvala - 'Shtvala' (htval provides all needed values).
142143
; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp).
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RV32I-M
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RV64I-M
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+short-forward-branch-opt | \
5+
; RUN: FileCheck %s --check-prefixes=RV32I-SFB-M
6+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+short-forward-branch-opt | \
7+
; RUN: FileCheck %s --check-prefixes=RV64I-SFB-M
8+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+short-forward-branch-i-mul | \
9+
; RUN: FileCheck %s --check-prefixes=RV32I-SFBIMul-M
10+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+short-forward-branch-i-mul | \
11+
; RUN: FileCheck %s --check-prefixes=RV64I-SFBIMul-M
12+
13+
define i32 @select_example_mul_i32(i32 %a, i32 %b, i1 zeroext %x, i32 %y) {
14+
; RV32I-M-LABEL: select_example_mul_i32:
15+
; RV32I-M: # %bb.0: # %entry
16+
; RV32I-M-NEXT: beqz a2, .LBB0_2
17+
; RV32I-M-NEXT: # %bb.1:
18+
; RV32I-M-NEXT: mul a1, a0, a3
19+
; RV32I-M-NEXT: .LBB0_2: # %entry
20+
; RV32I-M-NEXT: mv a0, a1
21+
; RV32I-M-NEXT: ret
22+
;
23+
; RV64I-M-LABEL: select_example_mul_i32:
24+
; RV64I-M: # %bb.0: # %entry
25+
; RV64I-M-NEXT: beqz a2, .LBB0_2
26+
; RV64I-M-NEXT: # %bb.1:
27+
; RV64I-M-NEXT: mulw a1, a0, a3
28+
; RV64I-M-NEXT: .LBB0_2: # %entry
29+
; RV64I-M-NEXT: mv a0, a1
30+
; RV64I-M-NEXT: ret
31+
;
32+
; RV32I-SFB-M-LABEL: select_example_mul_i32:
33+
; RV32I-SFB-M: # %bb.0: # %entry
34+
; RV32I-SFB-M-NEXT: mul a0, a0, a3
35+
; RV32I-SFB-M-NEXT: bnez a2, .LBB0_2
36+
; RV32I-SFB-M-NEXT: # %bb.1: # %entry
37+
; RV32I-SFB-M-NEXT: mv a0, a1
38+
; RV32I-SFB-M-NEXT: .LBB0_2: # %entry
39+
; RV32I-SFB-M-NEXT: ret
40+
;
41+
; RV64I-SFB-M-LABEL: select_example_mul_i32:
42+
; RV64I-SFB-M: # %bb.0: # %entry
43+
; RV64I-SFB-M-NEXT: mulw a0, a0, a3
44+
; RV64I-SFB-M-NEXT: bnez a2, .LBB0_2
45+
; RV64I-SFB-M-NEXT: # %bb.1: # %entry
46+
; RV64I-SFB-M-NEXT: mv a0, a1
47+
; RV64I-SFB-M-NEXT: .LBB0_2: # %entry
48+
; RV64I-SFB-M-NEXT: ret
49+
;
50+
; RV32I-SFBIMul-M-LABEL: select_example_mul_i32:
51+
; RV32I-SFBIMul-M: # %bb.0: # %entry
52+
; RV32I-SFBIMul-M-NEXT: beqz a2, .LBB0_2
53+
; RV32I-SFBIMul-M-NEXT: # %bb.1: # %entry
54+
; RV32I-SFBIMul-M-NEXT: mul a1, a0, a3
55+
; RV32I-SFBIMul-M-NEXT: .LBB0_2: # %entry
56+
; RV32I-SFBIMul-M-NEXT: mv a0, a1
57+
; RV32I-SFBIMul-M-NEXT: ret
58+
;
59+
; RV64I-SFBIMul-M-LABEL: select_example_mul_i32:
60+
; RV64I-SFBIMul-M: # %bb.0: # %entry
61+
; RV64I-SFBIMul-M-NEXT: mulw a0, a0, a3
62+
; RV64I-SFBIMul-M-NEXT: bnez a2, .LBB0_2
63+
; RV64I-SFBIMul-M-NEXT: # %bb.1: # %entry
64+
; RV64I-SFBIMul-M-NEXT: mv a0, a1
65+
; RV64I-SFBIMul-M-NEXT: .LBB0_2: # %entry
66+
; RV64I-SFBIMul-M-NEXT: ret
67+
entry:
68+
%res = mul i32 %a, %y
69+
%sel = select i1 %x, i32 %res, i32 %b
70+
ret i32 %sel
71+
}
72+
73+
define i64 @select_example_mul_i64(i64 %a, i64 %b, i1 zeroext %x, i64 %y) {
74+
; RV32I-M-LABEL: select_example_mul_i64:
75+
; RV32I-M: # %bb.0: # %entry
76+
; RV32I-M-NEXT: beqz a4, .LBB1_2
77+
; RV32I-M-NEXT: # %bb.1:
78+
; RV32I-M-NEXT: mul a2, a0, a6
79+
; RV32I-M-NEXT: mulhu a3, a0, a5
80+
; RV32I-M-NEXT: mul a1, a1, a5
81+
; RV32I-M-NEXT: add a2, a3, a2
82+
; RV32I-M-NEXT: add a3, a2, a1
83+
; RV32I-M-NEXT: mul a2, a0, a5
84+
; RV32I-M-NEXT: .LBB1_2: # %entry
85+
; RV32I-M-NEXT: mv a0, a2
86+
; RV32I-M-NEXT: mv a1, a3
87+
; RV32I-M-NEXT: ret
88+
;
89+
; RV64I-M-LABEL: select_example_mul_i64:
90+
; RV64I-M: # %bb.0: # %entry
91+
; RV64I-M-NEXT: beqz a2, .LBB1_2
92+
; RV64I-M-NEXT: # %bb.1:
93+
; RV64I-M-NEXT: mul a1, a0, a3
94+
; RV64I-M-NEXT: .LBB1_2: # %entry
95+
; RV64I-M-NEXT: mv a0, a1
96+
; RV64I-M-NEXT: ret
97+
;
98+
; RV32I-SFB-M-LABEL: select_example_mul_i64:
99+
; RV32I-SFB-M: # %bb.0: # %entry
100+
; RV32I-SFB-M-NEXT: mul a6, a0, a6
101+
; RV32I-SFB-M-NEXT: mulhu a7, a0, a5
102+
; RV32I-SFB-M-NEXT: mul a1, a1, a5
103+
; RV32I-SFB-M-NEXT: mul a0, a0, a5
104+
; RV32I-SFB-M-NEXT: add a6, a7, a6
105+
; RV32I-SFB-M-NEXT: beqz a4, .LBB1_2
106+
; RV32I-SFB-M-NEXT: # %bb.1: # %entry
107+
; RV32I-SFB-M-NEXT: add a3, a6, a1
108+
; RV32I-SFB-M-NEXT: .LBB1_2: # %entry
109+
; RV32I-SFB-M-NEXT: bnez a4, .LBB1_4
110+
; RV32I-SFB-M-NEXT: # %bb.3: # %entry
111+
; RV32I-SFB-M-NEXT: mv a0, a2
112+
; RV32I-SFB-M-NEXT: .LBB1_4: # %entry
113+
; RV32I-SFB-M-NEXT: mv a1, a3
114+
; RV32I-SFB-M-NEXT: ret
115+
;
116+
; RV64I-SFB-M-LABEL: select_example_mul_i64:
117+
; RV64I-SFB-M: # %bb.0: # %entry
118+
; RV64I-SFB-M-NEXT: mul a0, a0, a3
119+
; RV64I-SFB-M-NEXT: bnez a2, .LBB1_2
120+
; RV64I-SFB-M-NEXT: # %bb.1: # %entry
121+
; RV64I-SFB-M-NEXT: mv a0, a1
122+
; RV64I-SFB-M-NEXT: .LBB1_2: # %entry
123+
; RV64I-SFB-M-NEXT: ret
124+
;
125+
; RV32I-SFBIMul-M-LABEL: select_example_mul_i64:
126+
; RV32I-SFBIMul-M: # %bb.0: # %entry
127+
; RV32I-SFBIMul-M-NEXT: mul a6, a0, a6
128+
; RV32I-SFBIMul-M-NEXT: mulhu a7, a0, a5
129+
; RV32I-SFBIMul-M-NEXT: mul a1, a1, a5
130+
; RV32I-SFBIMul-M-NEXT: add a6, a7, a6
131+
; RV32I-SFBIMul-M-NEXT: beqz a4, .LBB1_2
132+
; RV32I-SFBIMul-M-NEXT: # %bb.1: # %entry
133+
; RV32I-SFBIMul-M-NEXT: add a3, a6, a1
134+
; RV32I-SFBIMul-M-NEXT: .LBB1_2: # %entry
135+
; RV32I-SFBIMul-M-NEXT: beqz a4, .LBB1_4
136+
; RV32I-SFBIMul-M-NEXT: # %bb.3: # %entry
137+
; RV32I-SFBIMul-M-NEXT: mul a2, a0, a5
138+
; RV32I-SFBIMul-M-NEXT: .LBB1_4: # %entry
139+
; RV32I-SFBIMul-M-NEXT: mv a0, a2
140+
; RV32I-SFBIMul-M-NEXT: mv a1, a3
141+
; RV32I-SFBIMul-M-NEXT: ret
142+
;
143+
; RV64I-SFBIMul-M-LABEL: select_example_mul_i64:
144+
; RV64I-SFBIMul-M: # %bb.0: # %entry
145+
; RV64I-SFBIMul-M-NEXT: beqz a2, .LBB1_2
146+
; RV64I-SFBIMul-M-NEXT: # %bb.1: # %entry
147+
; RV64I-SFBIMul-M-NEXT: mul a1, a0, a3
148+
; RV64I-SFBIMul-M-NEXT: .LBB1_2: # %entry
149+
; RV64I-SFBIMul-M-NEXT: mv a0, a1
150+
; RV64I-SFBIMul-M-NEXT: ret
151+
entry:
152+
%res = mul i64 %a, %y
153+
%sel = select i1 %x, i64 %res, i64 %b
154+
ret i64 %sel
155+
}
156+

0 commit comments

Comments
 (0)