Skip to content

Conversation

@zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Nov 14, 2025

Similar to several other targets, this commit override
isLSRCostLess to set instruction number as the first priority
when LSR pass deciding the cost.

Besides, this commit also takes the extra temporary register
may be used into account in NumRegs. This is same as riscv,
see the reason in #92296.

…rity

Similar to several other targets, this commit override
`isLSRCostLess` to set instruction number as the first priority
when LSR pass deciding the cost.

Besides, this commit also takes the extra temporary register
may be used into account in `NumRegs`. This is same as riscv,
see the reason in #92296.
@llvmbot
Copy link
Member

llvmbot commented Nov 14, 2025

@llvm/pr-subscribers-backend-loongarch

@llvm/pr-subscribers-llvm-transforms

Author: ZhaoQi (zhaoqi5)

Changes

Similar to several other targets, this commit override
isLSRCostLess to set instruction number as the first priority
when LSR pass deciding the cost.

Besides, this commit also takes the extra temporary register
may be used into account in NumRegs. This is same as riscv,
see the reason in #92296.


Full diff: https://github.com/llvm/llvm-project/pull/168035.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp (+14)
  • (modified) llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h (+3)
  • (modified) llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll (+29-35)
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 5107c8def3799..199af1f0dc29b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -133,3 +133,17 @@ LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
 
   return Options;
 }
+
+bool LoongArchTTIImpl::isLSRCostLess(
+    const TargetTransformInfo::LSRCost &C1,
+    const TargetTransformInfo::LSRCost &C2) const {
+  // LoongArch specific here are "instruction number 1st priority".
+  // If we need to emit adds inside the loop to add up base registers, then
+  // we need at least one extra temporary register.
+  unsigned C1NumRegs = C1.NumRegs + (C1.NumBaseAdds != 0);
+  unsigned C2NumRegs = C2.NumRegs + (C2.NumBaseAdds != 0);
+  return std::tie(C1.Insns, C1NumRegs, C1.AddRecCost, C1.NumIVMuls,
+                  C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+         std::tie(C2.Insns, C2NumRegs, C2.AddRecCost, C2.NumIVMuls,
+                  C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 9b479f9dc0dc5..247a4e87499e2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -57,6 +57,9 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
 
   TTI::MemCmpExpansionOptions
   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
+
+  bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+                     const TargetTransformInfo::LSRCost &C2) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
index 0bb1d43617011..a09a9a37034e1 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
@@ -5,20 +5,18 @@
 define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q) nounwind {
 ; CHECK-LLC-LABEL: foo:
 ; CHECK-LLC:       # %bb.0: # %entry
-; CHECK-LLC-NEXT:    lu12i.w $a3, -1
-; CHECK-LLC-NEXT:    lu12i.w $a4, 1
+; CHECK-LLC-NEXT:    move $a3, $zero
+; CHECK-LLC-NEXT:    ori $a4, $zero, 1024
 ; CHECK-LLC-NEXT:    .p2align 4, , 16
 ; CHECK-LLC-NEXT:  .LBB0_1: # %for.body
 ; CHECK-LLC-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-LLC-NEXT:    add.d $a5, $a0, $a3
-; CHECK-LLC-NEXT:    ldx.w $a5, $a5, $a4
-; CHECK-LLC-NEXT:    add.d $a6, $a1, $a3
-; CHECK-LLC-NEXT:    ldx.w $a6, $a6, $a4
+; CHECK-LLC-NEXT:    ldx.w $a5, $a0, $a3
+; CHECK-LLC-NEXT:    ldx.w $a6, $a1, $a3
 ; CHECK-LLC-NEXT:    add.d $a5, $a6, $a5
-; CHECK-LLC-NEXT:    add.d $a6, $a2, $a3
+; CHECK-LLC-NEXT:    stx.w $a5, $a2, $a3
+; CHECK-LLC-NEXT:    addi.d $a4, $a4, -1
 ; CHECK-LLC-NEXT:    addi.d $a3, $a3, 4
-; CHECK-LLC-NEXT:    stptr.w $a5, $a6, 4096
-; CHECK-LLC-NEXT:    bnez $a3, .LBB0_1
+; CHECK-LLC-NEXT:    bnez $a4, .LBB0_1
 ; CHECK-LLC-NEXT:  # %bb.2: # %for.cond.cleanup
 ; CHECK-LLC-NEXT:    ret
 ; CHECK-OPT-LABEL: define void @foo(
@@ -28,18 +26,17 @@ define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
 ; CHECK-OPT:       [[FOR_COND_CLEANUP:.*]]:
 ; CHECK-OPT-NEXT:    ret void
 ; CHECK-OPT:       [[FOR_BODY]]:
-; CHECK-OPT-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ -4096, %[[ENTRY]] ]
+; CHECK-OPT-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-OPT-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 1024, %[[ENTRY]] ]
 ; CHECK-OPT-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]]
-; CHECK-OPT-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 4096
-; CHECK-OPT-NEXT:    [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP5]], align 4
+; CHECK-OPT-NEXT:    [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
 ; CHECK-OPT-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]]
-; CHECK-OPT-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4096
-; CHECK-OPT-NEXT:    [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
+; CHECK-OPT-NEXT:    [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP2]], align 4
 ; CHECK-OPT-NEXT:    [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]]
 ; CHECK-OPT-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]]
-; CHECK-OPT-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 4096
-; CHECK-OPT-NEXT:    store i32 [[ADD]], ptr [[SCEVGEP1]], align 4
-; CHECK-OPT-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4
+; CHECK-OPT-NEXT:    store i32 [[ADD]], ptr [[SCEVGEP]], align 4
+; CHECK-OPT-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV1]], -1
+; CHECK-OPT-NEXT:    [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV]], 4
 ; CHECK-OPT-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
 ; CHECK-OPT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
 ;
@@ -69,19 +66,18 @@ define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
 ; CHECK-LLC-NEXT:    addi.w $a4, $a3, 0
 ; CHECK-LLC-NEXT:    blez $a4, .LBB1_3
 ; CHECK-LLC-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-LLC-NEXT:    move $a4, $zero
 ; CHECK-LLC-NEXT:    bstrpick.d $a3, $a3, 31, 0
+; CHECK-LLC-NEXT:    slli.d $a3, $a3, 2
 ; CHECK-LLC-NEXT:    .p2align 4, , 16
 ; CHECK-LLC-NEXT:  .LBB1_2: # %for.body
 ; CHECK-LLC-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-LLC-NEXT:    ld.w $a4, $a0, 0
-; CHECK-LLC-NEXT:    ld.w $a5, $a1, 0
-; CHECK-LLC-NEXT:    add.d $a4, $a5, $a4
-; CHECK-LLC-NEXT:    st.w $a4, $a2, 0
-; CHECK-LLC-NEXT:    addi.d $a3, $a3, -1
-; CHECK-LLC-NEXT:    addi.d $a2, $a2, 4
-; CHECK-LLC-NEXT:    addi.d $a1, $a1, 4
-; CHECK-LLC-NEXT:    addi.d $a0, $a0, 4
-; CHECK-LLC-NEXT:    bnez $a3, .LBB1_2
+; CHECK-LLC-NEXT:    ldx.w $a5, $a0, $a4
+; CHECK-LLC-NEXT:    ldx.w $a6, $a1, $a4
+; CHECK-LLC-NEXT:    add.d $a5, $a6, $a5
+; CHECK-LLC-NEXT:    stx.w $a5, $a2, $a4
+; CHECK-LLC-NEXT:    addi.d $a4, $a4, 4
+; CHECK-LLC-NEXT:    bne $a3, $a4, .LBB1_2
 ; CHECK-LLC-NEXT:  .LBB1_3: # %for.cond.cleanup
 ; CHECK-LLC-NEXT:    ret
 ; CHECK-OPT-LABEL: define void @bar(
@@ -91,25 +87,23 @@ define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
 ; CHECK-OPT-NEXT:    br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
 ; CHECK-OPT:       [[FOR_BODY_PREHEADER]]:
 ; CHECK-OPT-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-OPT-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
 ; CHECK-OPT-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK-OPT:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
 ; CHECK-OPT-NEXT:    br label %[[FOR_COND_CLEANUP]]
 ; CHECK-OPT:       [[FOR_COND_CLEANUP]]:
 ; CHECK-OPT-NEXT:    ret void
 ; CHECK-OPT:       [[FOR_BODY]]:
-; CHECK-OPT-NEXT:    [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ]
-; CHECK-OPT-NEXT:    [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[FOR_BODY]] ], [ [[Y]], %[[FOR_BODY_PREHEADER]] ]
-; CHECK-OPT-NEXT:    [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_BODY]] ], [ [[Q]], %[[FOR_BODY_PREHEADER]] ]
-; CHECK-OPT-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[WIDE_TRIP_COUNT]], %[[FOR_BODY_PREHEADER]] ]
+; CHECK-OPT-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-OPT-NEXT:    [[LSR_IV4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]]
 ; CHECK-OPT-NEXT:    [[LDTMP:%.*]] = load i32, ptr [[LSR_IV4]], align 4
+; CHECK-OPT-NEXT:    [[LSR_IV2:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]]
 ; CHECK-OPT-NEXT:    [[LDTMP1:%.*]] = load i32, ptr [[LSR_IV2]], align 4
 ; CHECK-OPT-NEXT:    [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]]
+; CHECK-OPT-NEXT:    [[LSR_IV1:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]]
 ; CHECK-OPT-NEXT:    store i32 [[ADD]], ptr [[LSR_IV1]], align 4
-; CHECK-OPT-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
-; CHECK-OPT-NEXT:    [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-OPT-NEXT:    [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
-; CHECK-OPT-NEXT:    [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4
-; CHECK-OPT-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; CHECK-OPT-NEXT:    [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
+; CHECK-OPT-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], [[LSR_IV_NEXT]]
 ; CHECK-OPT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]]
 ;
 entry:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants