Skip to content

Commit 7f4a910

Browse files
authored
[RISCV] Add Zilsd/Zclsd support to RISCVMakeCompressible. (#140136)
1 parent 23a674d commit 7f4a910

File tree

2 files changed

+326
-2
lines changed

2 files changed

+326
-2
lines changed

llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ static unsigned log2LdstWidth(unsigned Opcode) {
116116
case RISCV::FSW:
117117
return 2;
118118
case RISCV::LD:
119+
case RISCV::LD_RV32:
119120
case RISCV::SD:
121+
case RISCV::SD_RV32:
120122
case RISCV::FLD:
121123
case RISCV::FSD:
122124
return 3;
@@ -144,7 +146,9 @@ static unsigned offsetMask(unsigned Opcode) {
144146
case RISCV::FLW:
145147
case RISCV::FSW:
146148
case RISCV::LD:
149+
case RISCV::LD_RV32:
147150
case RISCV::SD:
151+
case RISCV::SD_RV32:
148152
case RISCV::FLD:
149153
case RISCV::FSD:
150154
return maskTrailingOnes<unsigned>(5U);
@@ -184,7 +188,8 @@ static bool isCompressedReg(Register Reg) {
184188
RISCV::GPRF16CRegClass.contains(Reg) ||
185189
RISCV::GPRF32CRegClass.contains(Reg) ||
186190
RISCV::FPR32CRegClass.contains(Reg) ||
187-
RISCV::FPR64CRegClass.contains(Reg);
191+
RISCV::FPR64CRegClass.contains(Reg) ||
192+
RISCV::GPRPairCRegClass.contains(Reg);
188193
}
189194

190195
// Return true if MI is a load for which there exists a compressed version.
@@ -203,6 +208,8 @@ static bool isCompressibleLoad(const MachineInstr &MI) {
203208
case RISCV::LW_INX:
204209
case RISCV::LD:
205210
return STI.hasStdExtCOrZca();
211+
case RISCV::LD_RV32:
212+
return STI.hasStdExtZclsd();
206213
case RISCV::FLW:
207214
return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
208215
case RISCV::FLD:
@@ -225,6 +232,8 @@ static bool isCompressibleStore(const MachineInstr &MI) {
225232
case RISCV::SW_INX:
226233
case RISCV::SD:
227234
return STI.hasStdExtCOrZca();
235+
case RISCV::SD_RV32:
236+
return STI.hasStdExtZclsd();
228237
case RISCV::FSW:
229238
return !STI.is64Bit() && STI.hasStdExtCOrZcfOrZce();
230239
case RISCV::FSD:
@@ -342,8 +351,10 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
342351
RCToScavenge = &RISCV::FPR32CRegClass;
343352
else if (RISCV::FPR64RegClass.contains(RegImm.Reg))
344353
RCToScavenge = &RISCV::FPR64CRegClass;
354+
else if (RISCV::GPRPairRegClass.contains(RegImm.Reg))
355+
RCToScavenge = &RISCV::GPRPairCRegClass;
345356
else
346-
return RISCV::NoRegister;
357+
return Register();
347358

348359
RegScavenger RS;
349360
RS.enterBasicBlockEnd(MBB);
@@ -400,6 +411,7 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
400411

401412
const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
402413
const RISCVInstrInfo &TII = *STI.getInstrInfo();
414+
const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
403415

404416
// This optimization only makes sense if compressed instructions are emitted.
405417
if (!STI.hasStdExtCOrZca())
@@ -438,7 +450,20 @@ bool RISCVMakeCompressibleOpt::runOnMachineFunction(MachineFunction &Fn) {
438450
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::PseudoMV_FPR32INX),
439451
NewReg)
440452
.addReg(RegImm.Reg);
453+
} else if (RISCV::GPRPairRegClass.contains(RegImm.Reg)) {
454+
assert(RegImm.Imm == 0);
455+
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI),
456+
TRI.getSubReg(NewReg, RISCV::sub_gpr_even))
457+
.addReg(TRI.getSubReg(RegImm.Reg, RISCV::sub_gpr_even))
458+
.addImm(0);
459+
BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI),
460+
TRI.getSubReg(NewReg, RISCV::sub_gpr_odd))
461+
.addReg(TRI.getSubReg(RegImm.Reg, RISCV::sub_gpr_odd))
462+
.addImm(0);
441463
} else {
464+
assert((RISCV::FPR32RegClass.contains(RegImm.Reg) ||
465+
RISCV::FPR64RegClass.contains(RegImm.Reg)) &&
466+
"Expected FP register class");
442467
// If we are looking at replacing an FPR register we don't expect to
443468
// have any offset. The only compressible FP instructions with an offset
444469
// are loads and stores, for which the offset applies to the GPR operand
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -o - %s -mtriple=riscv32 -mattr=+zilsd,+zclsd,+zdinx -simplify-mir \
3+
# RUN: -run-pass=riscv-make-compressible | FileCheck --check-prefixes=RV32 %s
4+
--- |
5+
define void @store_common_value_double(ptr %a, ptr %b, ptr %c, i32 %d, double %e, double %f) #0 {
6+
entry:
7+
store double %f, ptr %a, align 8
8+
store double %f, ptr %b, align 8
9+
store double %f, ptr %c, align 8
10+
ret void
11+
}
12+
13+
define void @store_common_ptr_double(double %a, double %b, double %d, ptr %p) #0 {
14+
entry:
15+
store volatile double %a, ptr %p, align 8
16+
store volatile double %b, ptr %p, align 8
17+
store volatile double %b, ptr %p, align 8
18+
ret void
19+
}
20+
21+
define void @load_common_ptr_double(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 {
22+
entry:
23+
%0 = load double, ptr %g, align 8
24+
%arrayidx1 = getelementptr inbounds { double, double, i32 }, ptr %g, i32 0, i32 1
25+
%1 = load double, ptr %arrayidx1, align 8
26+
%arrayidx2 = getelementptr inbounds { double, double, i32 }, ptr %g, i32 0, i32 2
27+
%2 = load i32, ptr %arrayidx2, align 8
28+
tail call void @load_common_ptr_double_1(double %0, double %1, i32 %2)
29+
ret void
30+
}
31+
32+
declare void @load_common_ptr_double_1(double, double, double) #0
33+
34+
define void @store_large_offset_double(ptr %p, i32 %dummy, double %a, double %b, double %c) #0 {
35+
entry:
36+
%0 = getelementptr inbounds double, ptr %p, i32 100
37+
store volatile double %a, ptr %0, align 8
38+
%1 = getelementptr inbounds double, ptr %p, i32 101
39+
store volatile double %b, ptr %1, align 8
40+
%2 = getelementptr inbounds double, ptr %p, i32 102
41+
store volatile double %b, ptr %2, align 8
42+
ret void
43+
}
44+
45+
define void @load_large_offset_double(i32 %a, i32 %b, i32 %c, i32 %d, ptr %p) #0 {
46+
entry:
47+
%arrayidx = getelementptr inbounds { [102 x double], i32 }, ptr %p, i32 0, i32 0, i32 100
48+
%0 = load double, ptr %arrayidx, align 8
49+
%arrayidx1 = getelementptr inbounds { [102 x double], i32 }, ptr %p, i32 0, i32 0, i32 101
50+
%1 = load double, ptr %arrayidx1, align 8
51+
%arrayidx2 = getelementptr inbounds { [102 x double], i32 }, ptr %p, i32 0, i32 1
52+
%2 = load i32, ptr %arrayidx2, align 8
53+
tail call void @load_large_offset_double_1(double %0, double %1, i32 %2)
54+
ret void
55+
}
56+
57+
declare void @load_large_offset_double_1(double, double) #0
58+
59+
define void @store_common_value_double_no_opt(ptr %a, i32 %b, double %c, double %d, double %e) #0 {
60+
entry:
61+
store double %e, ptr %a, align 8
62+
ret void
63+
}
64+
65+
define void @store_common_ptr_double_no_opt(double %a, i32 %b, i32 %c, i32 %d, i32 %e, ptr %p) #0 {
66+
entry:
67+
store volatile double %a, ptr %p, align 8
68+
ret void
69+
}
70+
71+
define double @load_common_ptr_double_no_opt(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ptr %g) #0 {
72+
entry:
73+
%0 = load double, ptr %g, align 8
74+
ret double %0
75+
}
76+
77+
define void @store_large_offset_double_no_opt(ptr %p, double %a, double %b) #0 {
78+
entry:
79+
%0 = getelementptr inbounds double, ptr %p, i32 100
80+
store volatile double %a, ptr %0, align 8
81+
%1 = getelementptr inbounds double, ptr %p, i32 101
82+
store volatile double %b, ptr %1, align 8
83+
ret void
84+
}
85+
86+
define { double, double } @load_large_offset_double_no_opt(ptr %p) #0 {
87+
entry:
88+
%arrayidx = getelementptr inbounds double, ptr %p, i32 100
89+
%0 = load double, ptr %arrayidx, align 8
90+
%arrayidx1 = getelementptr inbounds double, ptr %p, i32 101
91+
%1 = load double, ptr %arrayidx1, align 8
92+
%2 = insertvalue { double, double } undef, double %0, 0
93+
%3 = insertvalue { double, double } %2, double %1, 1
94+
ret { double, double } %3
95+
}
96+
97+
attributes #0 = { minsize "target-features"="+zilsd,+zdinx" }
98+
...
99+
---
100+
name: store_common_value_double
101+
tracksRegLiveness: true
102+
body: |
103+
bb.0.entry:
104+
liveins: $x10, $x11, $x12, $x16, $x17
105+
106+
; RV32-LABEL: name: store_common_value_double
107+
; RV32: liveins: $x10, $x11, $x12, $x16, $x17
108+
; RV32-NEXT: {{ $}}
109+
; RV32-NEXT: $x14 = ADDI $x16, 0
110+
; RV32-NEXT: $x15 = ADDI $x17, 0
111+
; RV32-NEXT: SD_RV32 $x14_x15, killed renamable $x10, 0 :: (store (s64) into %ir.a)
112+
; RV32-NEXT: SD_RV32 $x14_x15, killed renamable $x11, 0 :: (store (s64) into %ir.b)
113+
; RV32-NEXT: SD_RV32 killed $x14_x15, killed renamable $x12, 0 :: (store (s64) into %ir.c)
114+
; RV32-NEXT: PseudoRET
115+
SD_RV32 renamable $x16_x17, killed renamable $x10, 0 :: (store (s64) into %ir.a)
116+
SD_RV32 renamable $x16_x17, killed renamable $x11, 0 :: (store (s64) into %ir.b)
117+
SD_RV32 killed renamable $x16_x17, killed renamable $x12, 0 :: (store (s64) into %ir.c)
118+
PseudoRET
119+
120+
...
121+
---
122+
name: store_common_ptr_double
123+
tracksRegLiveness: true
124+
body: |
125+
bb.0.entry:
126+
liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16
127+
128+
; RV32-LABEL: name: store_common_ptr_double
129+
; RV32: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16
130+
; RV32-NEXT: {{ $}}
131+
; RV32-NEXT: $x14 = ADDI $x16, 0
132+
; RV32-NEXT: SD_RV32 killed renamable $x10_x11, $x14, 0 :: (volatile store (s64) into %ir.p)
133+
; RV32-NEXT: SD_RV32 renamable $x12_x13, $x14, 0 :: (volatile store (s64) into %ir.p)
134+
; RV32-NEXT: SD_RV32 killed renamable $x12_x13, killed $x14, 0 :: (volatile store (s64) into %ir.p)
135+
; RV32-NEXT: PseudoRET
136+
SD_RV32 killed renamable $x10_x11, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
137+
SD_RV32 renamable $x12_x13, renamable $x16, 0 :: (volatile store (s64) into %ir.p)
138+
SD_RV32 killed renamable $x12_x13, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
139+
PseudoRET
140+
141+
...
142+
---
143+
name: load_common_ptr_double
144+
tracksRegLiveness: true
145+
body: |
146+
bb.0.entry:
147+
liveins: $x16
148+
149+
; RV32-LABEL: name: load_common_ptr_double
150+
; RV32: liveins: $x16
151+
; RV32-NEXT: {{ $}}
152+
; RV32-NEXT: $x15 = ADDI $x16, 0
153+
; RV32-NEXT: renamable $x10_x11 = LD_RV32 $x15, 0 :: (load (s64) from %ir.g)
154+
; RV32-NEXT: renamable $x12_x13 = LD_RV32 $x15, 8 :: (load (s64) from %ir.arrayidx1)
155+
; RV32-NEXT: renamable $x14 = LW killed $x15, 16 :: (load (s32) from %ir.arrayidx2, align 8)
156+
; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, csr_ilp32_lp64, implicit $x2, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14
157+
renamable $x10_x11 = LD_RV32 renamable $x16, 0 :: (load (s64) from %ir.g)
158+
renamable $x12_x13 = LD_RV32 renamable $x16, 8 :: (load (s64) from %ir.arrayidx1)
159+
renamable $x14 = LW killed renamable $x16, 16 :: (load (s32) from %ir.arrayidx2, align 8)
160+
PseudoTAIL target-flags(riscv-call) @load_common_ptr_double_1, csr_ilp32_lp64, implicit $x2, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14
161+
162+
...
163+
---
164+
name: store_large_offset_double
165+
tracksRegLiveness: true
166+
body: |
167+
bb.0.entry:
168+
liveins: $x10, $x12, $x13, $x14, $x15
169+
170+
; RV32-LABEL: name: store_large_offset_double
171+
; RV32: liveins: $x10, $x12, $x13, $x14, $x15
172+
; RV32-NEXT: {{ $}}
173+
; RV32-NEXT: $x11 = ADDI $x10, 768
174+
; RV32-NEXT: SD_RV32 killed renamable $x12_x13, $x11, 32 :: (volatile store (s64) into %ir.0)
175+
; RV32-NEXT: SD_RV32 renamable $x14_x15, $x11, 40 :: (volatile store (s64) into %ir.1)
176+
; RV32-NEXT: SD_RV32 killed renamable $x14_x15, killed $x11, 48 :: (volatile store (s64) into %ir.2)
177+
; RV32-NEXT: PseudoRET
178+
SD_RV32 killed renamable $x12_x13, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
179+
SD_RV32 renamable $x14_x15, renamable $x10, 808 :: (volatile store (s64) into %ir.1)
180+
SD_RV32 killed renamable $x14_x15, killed renamable $x10, 816 :: (volatile store (s64) into %ir.2)
181+
PseudoRET
182+
183+
...
184+
---
185+
name: load_large_offset_double
186+
tracksRegLiveness: true
187+
body: |
188+
bb.0.entry:
189+
liveins: $x14
190+
191+
; RV32-LABEL: name: load_large_offset_double
192+
; RV32: liveins: $x14
193+
; RV32-NEXT: {{ $}}
194+
; RV32-NEXT: $x15 = ADDI $x14, 768
195+
; RV32-NEXT: renamable $x10_x11 = LD_RV32 $x15, 32 :: (load (s64) from %ir.arrayidx)
196+
; RV32-NEXT: renamable $x12_x13 = LD_RV32 $x15, 40 :: (load (s64) from %ir.arrayidx1)
197+
; RV32-NEXT: renamable $x14 = LW killed $x15, 48 :: (load (s32) from %ir.arrayidx2, align 8)
198+
; RV32-NEXT: PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, csr_ilp32_lp64, implicit $x2, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14
199+
renamable $x10_x11 = LD_RV32 renamable $x14, 800 :: (load (s64) from %ir.arrayidx)
200+
renamable $x12_x13 = LD_RV32 renamable $x14, 808 :: (load (s64) from %ir.arrayidx1)
201+
renamable $x14 = LW killed renamable $x14, 816 :: (load (s32) from %ir.arrayidx2, align 8)
202+
PseudoTAIL target-flags(riscv-call) @load_large_offset_double_1, csr_ilp32_lp64, implicit $x2, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14
203+
...
204+
---
205+
name: store_common_value_double_no_opt
206+
tracksRegLiveness: true
207+
body: |
208+
bb.0.entry:
209+
liveins: $x10, $x16, $x17
210+
211+
; RV32-LABEL: name: store_common_value_double_no_opt
212+
; RV32: liveins: $x10, $x16, $x17
213+
; RV32-NEXT: {{ $}}
214+
; RV32-NEXT: SD_RV32 killed renamable $x16_x17, killed renamable $x10, 0 :: (store (s64) into %ir.a)
215+
; RV32-NEXT: PseudoRET
216+
SD_RV32 killed renamable $x16_x17, killed renamable $x10, 0 :: (store (s64) into %ir.a)
217+
PseudoRET
218+
219+
...
220+
---
221+
name: store_common_ptr_double_no_opt
222+
tracksRegLiveness: true
223+
body: |
224+
bb.0.entry:
225+
liveins: $x10, $x11, $x16
226+
227+
; RV32-LABEL: name: store_common_ptr_double_no_opt
228+
; RV32: liveins: $x10, $x11, $x16
229+
; RV32-NEXT: {{ $}}
230+
; RV32-NEXT: SD_RV32 killed renamable $x10_x11, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
231+
; RV32-NEXT: PseudoRET
232+
SD_RV32 killed renamable $x10_x11, killed renamable $x16, 0 :: (volatile store (s64) into %ir.p)
233+
PseudoRET
234+
235+
...
236+
---
237+
name: load_common_ptr_double_no_opt
238+
tracksRegLiveness: true
239+
body: |
240+
bb.0.entry:
241+
liveins: $x16
242+
243+
; RV32-LABEL: name: load_common_ptr_double_no_opt
244+
; RV32: liveins: $x16
245+
; RV32-NEXT: {{ $}}
246+
; RV32-NEXT: renamable $x10_x11 = LD_RV32 killed renamable $x16, 0 :: (load (s64) from %ir.g)
247+
; RV32-NEXT: PseudoRET implicit $x10, implicit $x11
248+
renamable $x10_x11 = LD_RV32 killed renamable $x16, 0 :: (load (s64) from %ir.g)
249+
PseudoRET implicit $x10, implicit $x11
250+
251+
...
252+
---
253+
name: store_large_offset_double_no_opt
254+
tracksRegLiveness: true
255+
body: |
256+
bb.0.entry:
257+
liveins: $x10, $x11, $x12, $x13, $x14
258+
259+
; RV32-LABEL: name: store_large_offset_double_no_opt
260+
; RV32: liveins: $x10, $x11, $x12, $x13, $x14
261+
; RV32-NEXT: {{ $}}
262+
; RV32-NEXT: $x15 = ADDI $x14, 0
263+
; RV32-NEXT: $x17 = ADDI $x12, 0
264+
; RV32-NEXT: $x16 = ADDI $x11, 0
265+
; RV32-NEXT: SD_RV32 killed renamable $x16_x17, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
266+
; RV32-NEXT: $x14 = ADDI $x13, 0
267+
; RV32-NEXT: SD_RV32 killed renamable $x14_x15, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1)
268+
; RV32-NEXT: PseudoRET
269+
$x15 = ADDI $x14, 0
270+
$x17 = ADDI $x12, 0
271+
$x16 = ADDI $x11, 0
272+
SD_RV32 killed renamable $x16_x17, renamable $x10, 800 :: (volatile store (s64) into %ir.0)
273+
$x14 = ADDI $x13, 0
274+
SD_RV32 killed renamable $x14_x15, killed renamable $x10, 808 :: (volatile store (s64) into %ir.1)
275+
PseudoRET
276+
277+
...
278+
---
279+
name: load_large_offset_double_no_opt
280+
tracksRegLiveness: true
281+
body: |
282+
bb.0.entry:
283+
liveins: $x10
284+
285+
; RV32-LABEL: name: load_large_offset_double_no_opt
286+
; RV32: liveins: $x10
287+
; RV32-NEXT: {{ $}}
288+
; RV32-NEXT: renamable $x14_x15 = LD_RV32 renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
289+
; RV32-NEXT: renamable $x12_x13 = LD_RV32 killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
290+
; RV32-NEXT: $x10 = ADDI renamable $x14, 0
291+
; RV32-NEXT: $x11 = ADDI killed renamable $x15, 0
292+
; RV32-NEXT: PseudoRET implicit $x10, implicit $x11, implicit $x12, implicit $x13
293+
renamable $x14_x15 = LD_RV32 renamable $x10, 800 :: (load (s64) from %ir.arrayidx)
294+
renamable $x12_x13 = LD_RV32 killed renamable $x10, 808 :: (load (s64) from %ir.arrayidx1)
295+
$x10 = ADDI renamable $x14, 0
296+
$x11 = ADDI killed renamable $x15, 0
297+
PseudoRET implicit $x10, implicit $x11, implicit $x12, implicit $x13
298+
299+
...

0 commit comments

Comments
 (0)