Skip to content

Conversation

@tclin914
Copy link
Contributor

@tclin914 tclin914 commented Jun 5, 2025

More nds.lea.{h,w,d} are generated, similar to sh{1,2,3}add

tclin914 added 2 commits June 5, 2025 13:44
Generate the code for nds.lea.{h,w,d} similiar to sh{1,2,3}add
@llvmbot
Copy link
Member

llvmbot commented Jun 5, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Jim Lin (tclin914)

Changes

Generate the code for nds.lea.{h,w,d} similiar to sh{1,2,3}add


Full diff: https://github.com/llvm/llvm-project/pull/142920.diff

3 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-2)
  • (modified) llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll (+97)
  • (modified) llvm/test/CodeGen/RISCV/add_shl_constant.ll (+77)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1f7cf7e857d0f..aa8999755d560 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20413,8 +20413,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
 
     // Bail if we might break a sh{1,2,3}add pattern.
-    if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
-        C2->getZExtValue() <= 3 && N->hasOneUse() &&
+    if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
+        C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() &&
         N->user_begin()->getOpcode() == ISD::ADD &&
         !isUsedByLdSt(*N->user_begin(), nullptr) &&
         !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
index d7f62ae834346..9cb90cc16a519 100644
--- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefixes=RV64,NO-ZBA %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck -check-prefixes=RV64,ZBA %s
+; RUN: llc -mtriple=riscv64 -mattr=+xandesperf < %s | FileCheck -check-prefixes=RV64,XANDESPERF %s
 
 define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
 ; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add:
@@ -23,6 +24,16 @@ define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
 ; ZBA-NEXT:    sw a2, 24(a0)
 ; ZBA-NEXT:    sw a3, 140(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a3, a1, 5
+; XANDESPERF-NEXT:    sext.w a1, a1
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    sw a2, 20(a0)
+; XANDESPERF-NEXT:    sw a2, 24(a0)
+; XANDESPERF-NEXT:    sw a3, 140(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 5
   %idxprom = sext i32 %add to i64
@@ -66,6 +77,19 @@ define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b
 ; ZBA-NEXT:    sw a3, 4(a0)
 ; ZBA-NEXT:    sw a2, 120(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a3, a1, 2047
+; XANDESPERF-NEXT:    lui a4, 2
+; XANDESPERF-NEXT:    sext.w a1, a1
+; XANDESPERF-NEXT:    addi a3, a3, 1
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    add a0, a0, a4
+; XANDESPERF-NEXT:    sw a2, 0(a0)
+; XANDESPERF-NEXT:    sw a3, 4(a0)
+; XANDESPERF-NEXT:    sw a2, 120(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 2048
   %idxprom = sext i32 %add to i64
@@ -101,6 +125,16 @@ define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) {
 ; ZBA-NEXT:    sw a2, 24(a0)
 ; ZBA-NEXT:    sd a3, 140(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_sext:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    sext.w a1, a1
+; XANDESPERF-NEXT:    addi a3, a1, 5
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    sw a2, 20(a0)
+; XANDESPERF-NEXT:    sw a2, 24(a0)
+; XANDESPERF-NEXT:    sd a3, 140(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 5
   %idxprom = sext i32 %add to i64
@@ -147,6 +181,20 @@ define void @add_sext_shl_moreOneUse_add_inSelect(ptr %array1, i32 signext  %a,
 ; ZBA-NEXT:    sw a5, 24(a0)
 ; ZBA-NEXT:    sw a4, 140(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a4, a1, 5
+; XANDESPERF-NEXT:    mv a5, a4
+; XANDESPERF-NEXT:    bgtz a3, .LBB3_2
+; XANDESPERF-NEXT:  # %bb.1: # %entry
+; XANDESPERF-NEXT:    mv a5, a2
+; XANDESPERF-NEXT:  .LBB3_2: # %entry
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    sw a5, 20(a0)
+; XANDESPERF-NEXT:    sw a5, 24(a0)
+; XANDESPERF-NEXT:    sw a4, 140(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 5
   %cmp = icmp sgt i32 %x, 0
@@ -200,6 +248,23 @@ define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i
 ; ZBA-NEXT:    sw a1, 4(a0)
 ; ZBA-NEXT:    sw a4, 120(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a4, a1, 2047
+; XANDESPERF-NEXT:    lui a5, 2
+; XANDESPERF-NEXT:    addi a4, a4, 1
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    add a0, a0, a5
+; XANDESPERF-NEXT:    mv a1, a4
+; XANDESPERF-NEXT:    bgtz a3, .LBB4_2
+; XANDESPERF-NEXT:  # %bb.1: # %entry
+; XANDESPERF-NEXT:    mv a1, a2
+; XANDESPERF-NEXT:  .LBB4_2: # %entry
+; XANDESPERF-NEXT:    sw a1, 0(a0)
+; XANDESPERF-NEXT:    sw a1, 4(a0)
+; XANDESPERF-NEXT:    sw a4, 120(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 2048
   %cmp = icmp sgt i32 %x, 0
@@ -245,6 +310,20 @@ define void @add_shl_moreOneUse_inSelect(ptr %array1, i64 %a, i64 %b, i64 %x) {
 ; ZBA-NEXT:    sd a5, 48(a0)
 ; ZBA-NEXT:    sd a4, 280(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a4, a1, 5
+; XANDESPERF-NEXT:    mv a5, a4
+; XANDESPERF-NEXT:    bgtz a3, .LBB5_2
+; XANDESPERF-NEXT:  # %bb.1: # %entry
+; XANDESPERF-NEXT:    mv a5, a2
+; XANDESPERF-NEXT:  .LBB5_2: # %entry
+; XANDESPERF-NEXT:    nds.lea.d a0, a0, a1
+; XANDESPERF-NEXT:    sd a5, 40(a0)
+; XANDESPERF-NEXT:    sd a5, 48(a0)
+; XANDESPERF-NEXT:    sd a4, 280(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i64 %a, 5
   %cmp = icmp sgt i64 %x, 0
@@ -274,6 +353,12 @@ define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
 ; ZBA-NEXT:    ori a0, a0, 1
 ; ZBA-NEXT:    sh1add a0, a0, a0
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh1add:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    ori a0, a0, 1
+; XANDESPERF-NEXT:    nds.lea.h a0, a0, a0
+; XANDESPERF-NEXT:    ret
   %or = or i64 %x, 1
   %mul = shl i64 %or, 1
   %add = add i64 %mul, %or
@@ -294,6 +379,12 @@ define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
 ; ZBA-NEXT:    ori a0, a0, 1
 ; ZBA-NEXT:    sh2add a0, a0, a0
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh2add:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    ori a0, a0, 1
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a0
+; XANDESPERF-NEXT:    ret
   %or = or i64 %x, 1
   %mul = shl i64 %or, 2
   %add = add i64 %mul, %or
@@ -314,6 +405,12 @@ define i64 @add_shl_moreOneUse_sh3add(i64 %x) {
 ; ZBA-NEXT:    ori a0, a0, 1
 ; ZBA-NEXT:    sh3add a0, a0, a0
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh3add:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    ori a0, a0, 1
+; XANDESPERF-NEXT:    nds.lea.d a0, a0, a0
+; XANDESPERF-NEXT:    ret
   %or = or i64 %x, 1
   %mul = shl i64 %or, 3
   %add = add i64 %mul, %or
diff --git a/llvm/test/CodeGen/RISCV/add_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
index a4da9e2683648..bb0456f7fe1a6 100644
--- a/llvm/test/CodeGen/RISCV/add_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv32  < %s | FileCheck -check-prefixes=RV32,NO-ZBA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zba  < %s | FileCheck -check-prefixes=RV32,ZBA %s
+; RUN: llc -mtriple=riscv32 -mattr=+xandesperf  < %s | FileCheck -check-prefixes=RV32,XANDESPERF %s
 
 define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
 ; NO-ZBA-LABEL: add_shl_oneUse:
@@ -15,6 +16,12 @@ define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
 ; ZBA-NEXT:    addi a0, a0, 123
 ; ZBA-NEXT:    sh3add a0, a0, a1
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_oneUse:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    addi a0, a0, 123
+; XANDESPERF-NEXT:    nds.lea.d a0, a1, a0
+; XANDESPERF-NEXT:    ret
   %add.0 = add i32 %x, 123
   %shl = shl i32 %add.0, 3
   %add.1 = add i32 %shl, %y
@@ -40,6 +47,15 @@ define void @add_shl_moreOneUse_inStore(ptr %array1, i32 %a, i32 %b)  {
 ; ZBA-NEXT:    sw a2, 24(a0)
 ; ZBA-NEXT:    sw a3, 140(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inStore:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a3, a1, 5
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    sw a2, 20(a0)
+; XANDESPERF-NEXT:    sw a2, 24(a0)
+; XANDESPERF-NEXT:    sw a3, 140(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 5
   %arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -77,6 +93,18 @@ define void @add_shl_moreOneUse_inStore_addexceedsign12(ptr %array1, i32 %a, i32
 ; ZBA-NEXT:    sw a3, 4(a0)
 ; ZBA-NEXT:    sw a2, 120(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inStore_addexceedsign12:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a3, a1, 2047
+; XANDESPERF-NEXT:    lui a4, 2
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    addi a3, a3, 1
+; XANDESPERF-NEXT:    add a0, a0, a4
+; XANDESPERF-NEXT:    sw a2, 0(a0)
+; XANDESPERF-NEXT:    sw a3, 4(a0)
+; XANDESPERF-NEXT:    sw a2, 120(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 2048
   %arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -118,6 +146,20 @@ define void @add_shl_moreOneUse_inSelect(ptr %array1, i32 %a, i32 %b, i32 %x) {
 ; ZBA-NEXT:    sw a5, 24(a0)
 ; ZBA-NEXT:    sw a4, 140(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a4, a1, 5
+; XANDESPERF-NEXT:    mv a5, a4
+; XANDESPERF-NEXT:    bgtz a3, .LBB3_2
+; XANDESPERF-NEXT:  # %bb.1: # %entry
+; XANDESPERF-NEXT:    mv a5, a2
+; XANDESPERF-NEXT:  .LBB3_2: # %entry
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    sw a5, 20(a0)
+; XANDESPERF-NEXT:    sw a5, 24(a0)
+; XANDESPERF-NEXT:    sw a4, 140(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 5
   %cmp = icmp sgt i32 %x, 0
@@ -167,6 +209,23 @@ define void @add_shl_moreOneUse_inSelect_addexceedsign12(ptr %array1, i32 %a, i3
 ; ZBA-NEXT:    sw a5, 4(a0)
 ; ZBA-NEXT:    sw a4, 120(a0)
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_inSelect_addexceedsign12:
+; XANDESPERF:       # %bb.0: # %entry
+; XANDESPERF-NEXT:    addi a4, a1, 2047
+; XANDESPERF-NEXT:    addi a4, a4, 1
+; XANDESPERF-NEXT:    mv a5, a4
+; XANDESPERF-NEXT:    bgtz a3, .LBB4_2
+; XANDESPERF-NEXT:  # %bb.1: # %entry
+; XANDESPERF-NEXT:    mv a5, a2
+; XANDESPERF-NEXT:  .LBB4_2: # %entry
+; XANDESPERF-NEXT:    lui a2, 2
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a1
+; XANDESPERF-NEXT:    add a0, a0, a2
+; XANDESPERF-NEXT:    sw a5, 0(a0)
+; XANDESPERF-NEXT:    sw a5, 4(a0)
+; XANDESPERF-NEXT:    sw a4, 120(a0)
+; XANDESPERF-NEXT:    ret
 entry:
   %add = add nsw i32 %a, 2048
   %cmp = icmp sgt i32 %x, 0
@@ -195,6 +254,12 @@ define i32 @add_shl_moreOneUse_sh1add(i32 %x) {
 ; ZBA-NEXT:    ori a0, a0, 1
 ; ZBA-NEXT:    sh1add a0, a0, a0
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh1add:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    ori a0, a0, 1
+; XANDESPERF-NEXT:    nds.lea.h a0, a0, a0
+; XANDESPERF-NEXT:    ret
   %or = or i32 %x, 1
   %mul = shl i32 %or, 1
   %add = add i32 %mul, %or
@@ -215,6 +280,12 @@ define i32 @add_shl_moreOneUse_sh2add(i32 %x) {
 ; ZBA-NEXT:    ori a0, a0, 1
 ; ZBA-NEXT:    sh2add a0, a0, a0
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh2add:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    ori a0, a0, 1
+; XANDESPERF-NEXT:    nds.lea.w a0, a0, a0
+; XANDESPERF-NEXT:    ret
   %or = or i32 %x, 1
   %mul = shl i32 %or, 2
   %add = add i32 %mul, %or
@@ -235,6 +306,12 @@ define i32 @add_shl_moreOneUse_sh3add(i32 %x) {
 ; ZBA-NEXT:    ori a0, a0, 1
 ; ZBA-NEXT:    sh3add a0, a0, a0
 ; ZBA-NEXT:    ret
+;
+; XANDESPERF-LABEL: add_shl_moreOneUse_sh3add:
+; XANDESPERF:       # %bb.0:
+; XANDESPERF-NEXT:    ori a0, a0, 1
+; XANDESPERF-NEXT:    nds.lea.d a0, a0, a0
+; XANDESPERF-NEXT:    ret
   %or = or i32 %x, 1
   %mul = shl i32 %or, 3
   %add = add i32 %mul, %or

Copy link
Contributor

@wangpc-pp wangpc-pp left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@tclin914 tclin914 merged commit f8df240 into llvm:main Jun 6, 2025
13 checks passed
@tclin914 tclin914 deleted the xandesperf-commute-shift branch June 6, 2025 03:08
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants