Skip to content

Commit f8df240

Browse files
authored
[RISCV] Don't commute with shift if XAndesPerf is enabled (llvm#142920)
More nds.lea.{h,w,d} are generated, similar to sh{1,2,3}add
1 parent 05c12b2 commit f8df240

File tree

3 files changed

+176
-2
lines changed

3 files changed

+176
-2
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20413,8 +20413,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
2041320413
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
2041420414

2041520415
// Bail if we might break a sh{1,2,3}add pattern.
20416-
if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
20417-
C2->getZExtValue() <= 3 && N->hasOneUse() &&
20416+
if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
20417+
C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() &&
2041820418
N->user_begin()->getOpcode() == ISD::ADD &&
2041920419
!isUsedByLdSt(*N->user_begin(), nullptr) &&
2042020420
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))

llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
22
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefixes=RV64,NO-ZBA %s
33
; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck -check-prefixes=RV64,ZBA %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+xandesperf < %s | FileCheck -check-prefixes=RV64,XANDESPERF %s
45

56
define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
67
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add:
@@ -23,6 +24,16 @@ define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
2324
; ZBA-NEXT: sw a2, 24(a0)
2425
; ZBA-NEXT: sw a3, 140(a0)
2526
; ZBA-NEXT: ret
27+
;
28+
; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add:
29+
; XANDESPERF: # %bb.0: # %entry
30+
; XANDESPERF-NEXT: addi a3, a1, 5
31+
; XANDESPERF-NEXT: sext.w a1, a1
32+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
33+
; XANDESPERF-NEXT: sw a2, 20(a0)
34+
; XANDESPERF-NEXT: sw a2, 24(a0)
35+
; XANDESPERF-NEXT: sw a3, 140(a0)
36+
; XANDESPERF-NEXT: ret
2637
entry:
2738
%add = add nsw i32 %a, 5
2839
%idxprom = sext i32 %add to i64
@@ -66,6 +77,19 @@ define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b
6677
; ZBA-NEXT: sw a3, 4(a0)
6778
; ZBA-NEXT: sw a2, 120(a0)
6879
; ZBA-NEXT: ret
80+
;
81+
; XANDESPERF-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
82+
; XANDESPERF: # %bb.0: # %entry
83+
; XANDESPERF-NEXT: addi a3, a1, 2047
84+
; XANDESPERF-NEXT: lui a4, 2
85+
; XANDESPERF-NEXT: sext.w a1, a1
86+
; XANDESPERF-NEXT: addi a3, a3, 1
87+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
88+
; XANDESPERF-NEXT: add a0, a0, a4
89+
; XANDESPERF-NEXT: sw a2, 0(a0)
90+
; XANDESPERF-NEXT: sw a3, 4(a0)
91+
; XANDESPERF-NEXT: sw a2, 120(a0)
92+
; XANDESPERF-NEXT: ret
6993
entry:
7094
%add = add nsw i32 %a, 2048
7195
%idxprom = sext i32 %add to i64
@@ -101,6 +125,16 @@ define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) {
101125
; ZBA-NEXT: sw a2, 24(a0)
102126
; ZBA-NEXT: sd a3, 140(a0)
103127
; ZBA-NEXT: ret
128+
;
129+
; XANDESPERF-LABEL: add_sext_shl_moreOneUse_sext:
130+
; XANDESPERF: # %bb.0: # %entry
131+
; XANDESPERF-NEXT: sext.w a1, a1
132+
; XANDESPERF-NEXT: addi a3, a1, 5
133+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
134+
; XANDESPERF-NEXT: sw a2, 20(a0)
135+
; XANDESPERF-NEXT: sw a2, 24(a0)
136+
; XANDESPERF-NEXT: sd a3, 140(a0)
137+
; XANDESPERF-NEXT: ret
104138
entry:
105139
%add = add nsw i32 %a, 5
106140
%idxprom = sext i32 %add to i64
@@ -147,6 +181,20 @@ define void @add_sext_shl_moreOneUse_add_inSelect(ptr %array1, i32 signext %a,
147181
; ZBA-NEXT: sw a5, 24(a0)
148182
; ZBA-NEXT: sw a4, 140(a0)
149183
; ZBA-NEXT: ret
184+
;
185+
; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect:
186+
; XANDESPERF: # %bb.0: # %entry
187+
; XANDESPERF-NEXT: addi a4, a1, 5
188+
; XANDESPERF-NEXT: mv a5, a4
189+
; XANDESPERF-NEXT: bgtz a3, .LBB3_2
190+
; XANDESPERF-NEXT: # %bb.1: # %entry
191+
; XANDESPERF-NEXT: mv a5, a2
192+
; XANDESPERF-NEXT: .LBB3_2: # %entry
193+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
194+
; XANDESPERF-NEXT: sw a5, 20(a0)
195+
; XANDESPERF-NEXT: sw a5, 24(a0)
196+
; XANDESPERF-NEXT: sw a4, 140(a0)
197+
; XANDESPERF-NEXT: ret
150198
entry:
151199
%add = add nsw i32 %a, 5
152200
%cmp = icmp sgt i32 %x, 0
@@ -200,6 +248,23 @@ define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i
200248
; ZBA-NEXT: sw a1, 4(a0)
201249
; ZBA-NEXT: sw a4, 120(a0)
202250
; ZBA-NEXT: ret
251+
;
252+
; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
253+
; XANDESPERF: # %bb.0: # %entry
254+
; XANDESPERF-NEXT: addi a4, a1, 2047
255+
; XANDESPERF-NEXT: lui a5, 2
256+
; XANDESPERF-NEXT: addi a4, a4, 1
257+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
258+
; XANDESPERF-NEXT: add a0, a0, a5
259+
; XANDESPERF-NEXT: mv a1, a4
260+
; XANDESPERF-NEXT: bgtz a3, .LBB4_2
261+
; XANDESPERF-NEXT: # %bb.1: # %entry
262+
; XANDESPERF-NEXT: mv a1, a2
263+
; XANDESPERF-NEXT: .LBB4_2: # %entry
264+
; XANDESPERF-NEXT: sw a1, 0(a0)
265+
; XANDESPERF-NEXT: sw a1, 4(a0)
266+
; XANDESPERF-NEXT: sw a4, 120(a0)
267+
; XANDESPERF-NEXT: ret
203268
entry:
204269
%add = add nsw i32 %a, 2048
205270
%cmp = icmp sgt i32 %x, 0
@@ -245,6 +310,20 @@ define void @add_shl_moreOneUse_inSelect(ptr %array1, i64 %a, i64 %b, i64 %x) {
245310
; ZBA-NEXT: sd a5, 48(a0)
246311
; ZBA-NEXT: sd a4, 280(a0)
247312
; ZBA-NEXT: ret
313+
;
314+
; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect:
315+
; XANDESPERF: # %bb.0: # %entry
316+
; XANDESPERF-NEXT: addi a4, a1, 5
317+
; XANDESPERF-NEXT: mv a5, a4
318+
; XANDESPERF-NEXT: bgtz a3, .LBB5_2
319+
; XANDESPERF-NEXT: # %bb.1: # %entry
320+
; XANDESPERF-NEXT: mv a5, a2
321+
; XANDESPERF-NEXT: .LBB5_2: # %entry
322+
; XANDESPERF-NEXT: nds.lea.d a0, a0, a1
323+
; XANDESPERF-NEXT: sd a5, 40(a0)
324+
; XANDESPERF-NEXT: sd a5, 48(a0)
325+
; XANDESPERF-NEXT: sd a4, 280(a0)
326+
; XANDESPERF-NEXT: ret
248327
entry:
249328
%add = add nsw i64 %a, 5
250329
%cmp = icmp sgt i64 %x, 0
@@ -274,6 +353,12 @@ define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
274353
; ZBA-NEXT: ori a0, a0, 1
275354
; ZBA-NEXT: sh1add a0, a0, a0
276355
; ZBA-NEXT: ret
356+
;
357+
; XANDESPERF-LABEL: add_shl_moreOneUse_sh1add:
358+
; XANDESPERF: # %bb.0:
359+
; XANDESPERF-NEXT: ori a0, a0, 1
360+
; XANDESPERF-NEXT: nds.lea.h a0, a0, a0
361+
; XANDESPERF-NEXT: ret
277362
%or = or i64 %x, 1
278363
%mul = shl i64 %or, 1
279364
%add = add i64 %mul, %or
@@ -294,6 +379,12 @@ define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
294379
; ZBA-NEXT: ori a0, a0, 1
295380
; ZBA-NEXT: sh2add a0, a0, a0
296381
; ZBA-NEXT: ret
382+
;
383+
; XANDESPERF-LABEL: add_shl_moreOneUse_sh2add:
384+
; XANDESPERF: # %bb.0:
385+
; XANDESPERF-NEXT: ori a0, a0, 1
386+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a0
387+
; XANDESPERF-NEXT: ret
297388
%or = or i64 %x, 1
298389
%mul = shl i64 %or, 2
299390
%add = add i64 %mul, %or
@@ -314,6 +405,12 @@ define i64 @add_shl_moreOneUse_sh3add(i64 %x) {
314405
; ZBA-NEXT: ori a0, a0, 1
315406
; ZBA-NEXT: sh3add a0, a0, a0
316407
; ZBA-NEXT: ret
408+
;
409+
; XANDESPERF-LABEL: add_shl_moreOneUse_sh3add:
410+
; XANDESPERF: # %bb.0:
411+
; XANDESPERF-NEXT: ori a0, a0, 1
412+
; XANDESPERF-NEXT: nds.lea.d a0, a0, a0
413+
; XANDESPERF-NEXT: ret
317414
%or = or i64 %x, 1
318415
%mul = shl i64 %or, 3
319416
%add = add i64 %mul, %or

llvm/test/CodeGen/RISCV/add_shl_constant.ll

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefixes=RV32,NO-ZBA %s
33
; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck -check-prefixes=RV32,ZBA %s
4+
; RUN: llc -mtriple=riscv32 -mattr=+xandesperf < %s | FileCheck -check-prefixes=RV32,XANDESPERF %s
45

56
define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
67
; NO-ZBA-LABEL: add_shl_oneUse:
@@ -15,6 +16,12 @@ define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
1516
; ZBA-NEXT: addi a0, a0, 123
1617
; ZBA-NEXT: sh3add a0, a0, a1
1718
; ZBA-NEXT: ret
19+
;
20+
; XANDESPERF-LABEL: add_shl_oneUse:
21+
; XANDESPERF: # %bb.0:
22+
; XANDESPERF-NEXT: addi a0, a0, 123
23+
; XANDESPERF-NEXT: nds.lea.d a0, a1, a0
24+
; XANDESPERF-NEXT: ret
1825
%add.0 = add i32 %x, 123
1926
%shl = shl i32 %add.0, 3
2027
%add.1 = add i32 %shl, %y
@@ -40,6 +47,15 @@ define void @add_shl_moreOneUse_inStore(ptr %array1, i32 %a, i32 %b) {
4047
; ZBA-NEXT: sw a2, 24(a0)
4148
; ZBA-NEXT: sw a3, 140(a0)
4249
; ZBA-NEXT: ret
50+
;
51+
; XANDESPERF-LABEL: add_shl_moreOneUse_inStore:
52+
; XANDESPERF: # %bb.0: # %entry
53+
; XANDESPERF-NEXT: addi a3, a1, 5
54+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
55+
; XANDESPERF-NEXT: sw a2, 20(a0)
56+
; XANDESPERF-NEXT: sw a2, 24(a0)
57+
; XANDESPERF-NEXT: sw a3, 140(a0)
58+
; XANDESPERF-NEXT: ret
4359
entry:
4460
%add = add nsw i32 %a, 5
4561
%arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -77,6 +93,18 @@ define void @add_shl_moreOneUse_inStore_addexceedsign12(ptr %array1, i32 %a, i32
7793
; ZBA-NEXT: sw a3, 4(a0)
7894
; ZBA-NEXT: sw a2, 120(a0)
7995
; ZBA-NEXT: ret
96+
;
97+
; XANDESPERF-LABEL: add_shl_moreOneUse_inStore_addexceedsign12:
98+
; XANDESPERF: # %bb.0: # %entry
99+
; XANDESPERF-NEXT: addi a3, a1, 2047
100+
; XANDESPERF-NEXT: lui a4, 2
101+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
102+
; XANDESPERF-NEXT: addi a3, a3, 1
103+
; XANDESPERF-NEXT: add a0, a0, a4
104+
; XANDESPERF-NEXT: sw a2, 0(a0)
105+
; XANDESPERF-NEXT: sw a3, 4(a0)
106+
; XANDESPERF-NEXT: sw a2, 120(a0)
107+
; XANDESPERF-NEXT: ret
80108
entry:
81109
%add = add nsw i32 %a, 2048
82110
%arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -118,6 +146,20 @@ define void @add_shl_moreOneUse_inSelect(ptr %array1, i32 %a, i32 %b, i32 %x) {
118146
; ZBA-NEXT: sw a5, 24(a0)
119147
; ZBA-NEXT: sw a4, 140(a0)
120148
; ZBA-NEXT: ret
149+
;
150+
; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect:
151+
; XANDESPERF: # %bb.0: # %entry
152+
; XANDESPERF-NEXT: addi a4, a1, 5
153+
; XANDESPERF-NEXT: mv a5, a4
154+
; XANDESPERF-NEXT: bgtz a3, .LBB3_2
155+
; XANDESPERF-NEXT: # %bb.1: # %entry
156+
; XANDESPERF-NEXT: mv a5, a2
157+
; XANDESPERF-NEXT: .LBB3_2: # %entry
158+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
159+
; XANDESPERF-NEXT: sw a5, 20(a0)
160+
; XANDESPERF-NEXT: sw a5, 24(a0)
161+
; XANDESPERF-NEXT: sw a4, 140(a0)
162+
; XANDESPERF-NEXT: ret
121163
entry:
122164
%add = add nsw i32 %a, 5
123165
%cmp = icmp sgt i32 %x, 0
@@ -167,6 +209,23 @@ define void @add_shl_moreOneUse_inSelect_addexceedsign12(ptr %array1, i32 %a, i3
167209
; ZBA-NEXT: sw a5, 4(a0)
168210
; ZBA-NEXT: sw a4, 120(a0)
169211
; ZBA-NEXT: ret
212+
;
213+
; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect_addexceedsign12:
214+
; XANDESPERF: # %bb.0: # %entry
215+
; XANDESPERF-NEXT: addi a4, a1, 2047
216+
; XANDESPERF-NEXT: addi a4, a4, 1
217+
; XANDESPERF-NEXT: mv a5, a4
218+
; XANDESPERF-NEXT: bgtz a3, .LBB4_2
219+
; XANDESPERF-NEXT: # %bb.1: # %entry
220+
; XANDESPERF-NEXT: mv a5, a2
221+
; XANDESPERF-NEXT: .LBB4_2: # %entry
222+
; XANDESPERF-NEXT: lui a2, 2
223+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
224+
; XANDESPERF-NEXT: add a0, a0, a2
225+
; XANDESPERF-NEXT: sw a5, 0(a0)
226+
; XANDESPERF-NEXT: sw a5, 4(a0)
227+
; XANDESPERF-NEXT: sw a4, 120(a0)
228+
; XANDESPERF-NEXT: ret
170229
entry:
171230
%add = add nsw i32 %a, 2048
172231
%cmp = icmp sgt i32 %x, 0
@@ -195,6 +254,12 @@ define i32 @add_shl_moreOneUse_sh1add(i32 %x) {
195254
; ZBA-NEXT: ori a0, a0, 1
196255
; ZBA-NEXT: sh1add a0, a0, a0
197256
; ZBA-NEXT: ret
257+
;
258+
; XANDESPERF-LABEL: add_shl_moreOneUse_sh1add:
259+
; XANDESPERF: # %bb.0:
260+
; XANDESPERF-NEXT: ori a0, a0, 1
261+
; XANDESPERF-NEXT: nds.lea.h a0, a0, a0
262+
; XANDESPERF-NEXT: ret
198263
%or = or i32 %x, 1
199264
%mul = shl i32 %or, 1
200265
%add = add i32 %mul, %or
@@ -215,6 +280,12 @@ define i32 @add_shl_moreOneUse_sh2add(i32 %x) {
215280
; ZBA-NEXT: ori a0, a0, 1
216281
; ZBA-NEXT: sh2add a0, a0, a0
217282
; ZBA-NEXT: ret
283+
;
284+
; XANDESPERF-LABEL: add_shl_moreOneUse_sh2add:
285+
; XANDESPERF: # %bb.0:
286+
; XANDESPERF-NEXT: ori a0, a0, 1
287+
; XANDESPERF-NEXT: nds.lea.w a0, a0, a0
288+
; XANDESPERF-NEXT: ret
218289
%or = or i32 %x, 1
219290
%mul = shl i32 %or, 2
220291
%add = add i32 %mul, %or
@@ -235,6 +306,12 @@ define i32 @add_shl_moreOneUse_sh3add(i32 %x) {
235306
; ZBA-NEXT: ori a0, a0, 1
236307
; ZBA-NEXT: sh3add a0, a0, a0
237308
; ZBA-NEXT: ret
309+
;
310+
; XANDESPERF-LABEL: add_shl_moreOneUse_sh3add:
311+
; XANDESPERF: # %bb.0:
312+
; XANDESPERF-NEXT: ori a0, a0, 1
313+
; XANDESPERF-NEXT: nds.lea.d a0, a0, a0
314+
; XANDESPERF-NEXT: ret
238315
%or = or i32 %x, 1
239316
%mul = shl i32 %or, 3
240317
%add = add i32 %mul, %or

0 commit comments

Comments
 (0)