-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV] Don't commute with shift if XAndesPerf is enabled #142920
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Generate the code for nds.lea.{h,w,d} similiar to sh{1,2,3}add
|
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesGenerate the code for nds.lea.{h,w,d} similiar to sh{1,2,3}add Full diff: https://github.com/llvm/llvm-project/pull/142920.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1f7cf7e857d0f..aa8999755d560 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20413,8 +20413,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
// Bail if we might break a sh{1,2,3}add pattern.
- if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
- C2->getZExtValue() <= 3 && N->hasOneUse() &&
+ if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
+ C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() &&
N->user_begin()->getOpcode() == ISD::ADD &&
!isUsedByLdSt(*N->user_begin(), nullptr) &&
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
index d7f62ae834346..9cb90cc16a519 100644
--- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefixes=RV64,NO-ZBA %s
; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck -check-prefixes=RV64,ZBA %s
+; RUN: llc -mtriple=riscv64 -mattr=+xandesperf < %s | FileCheck -check-prefixes=RV64,XANDESPERF %s
define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add:
@@ -23,6 +24,16 @@ define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
; ZBA-NEXT: sw a2, 24(a0)
; ZBA-NEXT: sw a3, 140(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a3, a1, 5
+; XANDESPERF-NEXT: sext.w a1, a1
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: sw a2, 20(a0)
+; XANDESPERF-NEXT: sw a2, 24(a0)
+; XANDESPERF-NEXT: sw a3, 140(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%idxprom = sext i32 %add to i64
@@ -66,6 +77,19 @@ define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b
; ZBA-NEXT: sw a3, 4(a0)
; ZBA-NEXT: sw a2, 120(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a3, a1, 2047
+; XANDESPERF-NEXT: lui a4, 2
+; XANDESPERF-NEXT: sext.w a1, a1
+; XANDESPERF-NEXT: addi a3, a3, 1
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: add a0, a0, a4
+; XANDESPERF-NEXT: sw a2, 0(a0)
+; XANDESPERF-NEXT: sw a3, 4(a0)
+; XANDESPERF-NEXT: sw a2, 120(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%idxprom = sext i32 %add to i64
@@ -101,6 +125,16 @@ define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) {
; ZBA-NEXT: sw a2, 24(a0)
; ZBA-NEXT: sd a3, 140(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_sext:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: sext.w a1, a1
+; XANDESPERF-NEXT: addi a3, a1, 5
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: sw a2, 20(a0)
+; XANDESPERF-NEXT: sw a2, 24(a0)
+; XANDESPERF-NEXT: sd a3, 140(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%idxprom = sext i32 %add to i64
@@ -147,6 +181,20 @@ define void @add_sext_shl_moreOneUse_add_inSelect(ptr %array1, i32 signext %a,
; ZBA-NEXT: sw a5, 24(a0)
; ZBA-NEXT: sw a4, 140(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a4, a1, 5
+; XANDESPERF-NEXT: mv a5, a4
+; XANDESPERF-NEXT: bgtz a3, .LBB3_2
+; XANDESPERF-NEXT: # %bb.1: # %entry
+; XANDESPERF-NEXT: mv a5, a2
+; XANDESPERF-NEXT: .LBB3_2: # %entry
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: sw a5, 20(a0)
+; XANDESPERF-NEXT: sw a5, 24(a0)
+; XANDESPERF-NEXT: sw a4, 140(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%cmp = icmp sgt i32 %x, 0
@@ -200,6 +248,23 @@ define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i
; ZBA-NEXT: sw a1, 4(a0)
; ZBA-NEXT: sw a4, 120(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a4, a1, 2047
+; XANDESPERF-NEXT: lui a5, 2
+; XANDESPERF-NEXT: addi a4, a4, 1
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: add a0, a0, a5
+; XANDESPERF-NEXT: mv a1, a4
+; XANDESPERF-NEXT: bgtz a3, .LBB4_2
+; XANDESPERF-NEXT: # %bb.1: # %entry
+; XANDESPERF-NEXT: mv a1, a2
+; XANDESPERF-NEXT: .LBB4_2: # %entry
+; XANDESPERF-NEXT: sw a1, 0(a0)
+; XANDESPERF-NEXT: sw a1, 4(a0)
+; XANDESPERF-NEXT: sw a4, 120(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%cmp = icmp sgt i32 %x, 0
@@ -245,6 +310,20 @@ define void @add_shl_moreOneUse_inSelect(ptr %array1, i64 %a, i64 %b, i64 %x) {
; ZBA-NEXT: sd a5, 48(a0)
; ZBA-NEXT: sd a4, 280(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a4, a1, 5
+; XANDESPERF-NEXT: mv a5, a4
+; XANDESPERF-NEXT: bgtz a3, .LBB5_2
+; XANDESPERF-NEXT: # %bb.1: # %entry
+; XANDESPERF-NEXT: mv a5, a2
+; XANDESPERF-NEXT: .LBB5_2: # %entry
+; XANDESPERF-NEXT: nds.lea.d a0, a0, a1
+; XANDESPERF-NEXT: sd a5, 40(a0)
+; XANDESPERF-NEXT: sd a5, 48(a0)
+; XANDESPERF-NEXT: sd a4, 280(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i64 %a, 5
%cmp = icmp sgt i64 %x, 0
@@ -274,6 +353,12 @@ define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
; ZBA-NEXT: ori a0, a0, 1
; ZBA-NEXT: sh1add a0, a0, a0
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh1add:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: ori a0, a0, 1
+; XANDESPERF-NEXT: nds.lea.h a0, a0, a0
+; XANDESPERF-NEXT: ret
%or = or i64 %x, 1
%mul = shl i64 %or, 1
%add = add i64 %mul, %or
@@ -294,6 +379,12 @@ define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
; ZBA-NEXT: ori a0, a0, 1
; ZBA-NEXT: sh2add a0, a0, a0
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh2add:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: ori a0, a0, 1
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a0
+; XANDESPERF-NEXT: ret
%or = or i64 %x, 1
%mul = shl i64 %or, 2
%add = add i64 %mul, %or
@@ -314,6 +405,12 @@ define i64 @add_shl_moreOneUse_sh3add(i64 %x) {
; ZBA-NEXT: ori a0, a0, 1
; ZBA-NEXT: sh3add a0, a0, a0
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh3add:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: ori a0, a0, 1
+; XANDESPERF-NEXT: nds.lea.d a0, a0, a0
+; XANDESPERF-NEXT: ret
%or = or i64 %x, 1
%mul = shl i64 %or, 3
%add = add i64 %mul, %or
diff --git a/llvm/test/CodeGen/RISCV/add_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
index a4da9e2683648..bb0456f7fe1a6 100644
--- a/llvm/test/CodeGen/RISCV/add_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefixes=RV32,NO-ZBA %s
; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck -check-prefixes=RV32,ZBA %s
+; RUN: llc -mtriple=riscv32 -mattr=+xandesperf < %s | FileCheck -check-prefixes=RV32,XANDESPERF %s
define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
; NO-ZBA-LABEL: add_shl_oneUse:
@@ -15,6 +16,12 @@ define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
; ZBA-NEXT: addi a0, a0, 123
; ZBA-NEXT: sh3add a0, a0, a1
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_oneUse:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: addi a0, a0, 123
+; XANDESPERF-NEXT: nds.lea.d a0, a1, a0
+; XANDESPERF-NEXT: ret
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
%add.1 = add i32 %shl, %y
@@ -40,6 +47,15 @@ define void @add_shl_moreOneUse_inStore(ptr %array1, i32 %a, i32 %b) {
; ZBA-NEXT: sw a2, 24(a0)
; ZBA-NEXT: sw a3, 140(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inStore:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a3, a1, 5
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: sw a2, 20(a0)
+; XANDESPERF-NEXT: sw a2, 24(a0)
+; XANDESPERF-NEXT: sw a3, 140(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -77,6 +93,18 @@ define void @add_shl_moreOneUse_inStore_addexceedsign12(ptr %array1, i32 %a, i32
; ZBA-NEXT: sw a3, 4(a0)
; ZBA-NEXT: sw a2, 120(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inStore_addexceedsign12:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a3, a1, 2047
+; XANDESPERF-NEXT: lui a4, 2
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: addi a3, a3, 1
+; XANDESPERF-NEXT: add a0, a0, a4
+; XANDESPERF-NEXT: sw a2, 0(a0)
+; XANDESPERF-NEXT: sw a3, 4(a0)
+; XANDESPERF-NEXT: sw a2, 120(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -118,6 +146,20 @@ define void @add_shl_moreOneUse_inSelect(ptr %array1, i32 %a, i32 %b, i32 %x) {
; ZBA-NEXT: sw a5, 24(a0)
; ZBA-NEXT: sw a4, 140(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a4, a1, 5
+; XANDESPERF-NEXT: mv a5, a4
+; XANDESPERF-NEXT: bgtz a3, .LBB3_2
+; XANDESPERF-NEXT: # %bb.1: # %entry
+; XANDESPERF-NEXT: mv a5, a2
+; XANDESPERF-NEXT: .LBB3_2: # %entry
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: sw a5, 20(a0)
+; XANDESPERF-NEXT: sw a5, 24(a0)
+; XANDESPERF-NEXT: sw a4, 140(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%cmp = icmp sgt i32 %x, 0
@@ -167,6 +209,23 @@ define void @add_shl_moreOneUse_inSelect_addexceedsign12(ptr %array1, i32 %a, i3
; ZBA-NEXT: sw a5, 4(a0)
; ZBA-NEXT: sw a4, 120(a0)
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect_addexceedsign12:
+; XANDESPERF: # %bb.0: # %entry
+; XANDESPERF-NEXT: addi a4, a1, 2047
+; XANDESPERF-NEXT: addi a4, a4, 1
+; XANDESPERF-NEXT: mv a5, a4
+; XANDESPERF-NEXT: bgtz a3, .LBB4_2
+; XANDESPERF-NEXT: # %bb.1: # %entry
+; XANDESPERF-NEXT: mv a5, a2
+; XANDESPERF-NEXT: .LBB4_2: # %entry
+; XANDESPERF-NEXT: lui a2, 2
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT: add a0, a0, a2
+; XANDESPERF-NEXT: sw a5, 0(a0)
+; XANDESPERF-NEXT: sw a5, 4(a0)
+; XANDESPERF-NEXT: sw a4, 120(a0)
+; XANDESPERF-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%cmp = icmp sgt i32 %x, 0
@@ -195,6 +254,12 @@ define i32 @add_shl_moreOneUse_sh1add(i32 %x) {
; ZBA-NEXT: ori a0, a0, 1
; ZBA-NEXT: sh1add a0, a0, a0
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh1add:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: ori a0, a0, 1
+; XANDESPERF-NEXT: nds.lea.h a0, a0, a0
+; XANDESPERF-NEXT: ret
%or = or i32 %x, 1
%mul = shl i32 %or, 1
%add = add i32 %mul, %or
@@ -215,6 +280,12 @@ define i32 @add_shl_moreOneUse_sh2add(i32 %x) {
; ZBA-NEXT: ori a0, a0, 1
; ZBA-NEXT: sh2add a0, a0, a0
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh2add:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: ori a0, a0, 1
+; XANDESPERF-NEXT: nds.lea.w a0, a0, a0
+; XANDESPERF-NEXT: ret
%or = or i32 %x, 1
%mul = shl i32 %or, 2
%add = add i32 %mul, %or
@@ -235,6 +306,12 @@ define i32 @add_shl_moreOneUse_sh3add(i32 %x) {
; ZBA-NEXT: ori a0, a0, 1
; ZBA-NEXT: sh3add a0, a0, a0
; ZBA-NEXT: ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh3add:
+; XANDESPERF: # %bb.0:
+; XANDESPERF-NEXT: ori a0, a0, 1
+; XANDESPERF-NEXT: nds.lea.d a0, a0, a0
+; XANDESPERF-NEXT: ret
%or = or i32 %x, 1
%mul = shl i32 %or, 3
%add = add i32 %mul, %or
|
wangpc-pp
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
More nds.lea.{h,w,d} are generated, similar to sh{1,2,3}add