-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[LoongArch] Override isLSRCostLess to set Insns as the first priority
#168035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/zhaoqi5/tests-lsr
Are you sure you want to change the base?
[LoongArch] Override isLSRCostLess to set Insns as the first priority
#168035
Conversation
…rity Similar to several other targets, this commit override `isLSRCostLess` to set instruction number as the first priority when LSR pass deciding the cost. Besides, this commit also takes the extra temporary register may be used into account in `NumRegs`. This is same as riscv, see the reason in #92296.
|
@llvm/pr-subscribers-backend-loongarch @llvm/pr-subscribers-llvm-transforms Author: ZhaoQi (zhaoqi5) ChangesSimilar to several other targets, this commit override Besides, this commit also takes the extra temporary register Full diff: https://github.com/llvm/llvm-project/pull/168035.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
index 5107c8def3799..199af1f0dc29b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -133,3 +133,17 @@ LoongArchTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+
+bool LoongArchTTIImpl::isLSRCostLess(
+ const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) const {
+ // LoongArch specific here are "instruction number 1st priority".
+ // If we need to emit adds inside the loop to add up base registers, then
+ // we need at least one extra temporary register.
+ unsigned C1NumRegs = C1.NumRegs + (C1.NumBaseAdds != 0);
+ unsigned C2NumRegs = C2.NumRegs + (C2.NumBaseAdds != 0);
+ return std::tie(C1.Insns, C1NumRegs, C1.AddRecCost, C1.NumIVMuls,
+ C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
+ std::tie(C2.Insns, C2NumRegs, C2.AddRecCost, C2.NumIVMuls,
+ C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
index 9b479f9dc0dc5..247a4e87499e2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -57,6 +57,9 @@ class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
+
+ bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
+ const TargetTransformInfo::LSRCost &C2) const override;
};
} // end namespace llvm
diff --git a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
index 0bb1d43617011..a09a9a37034e1 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/LoongArch/lsr-insns.ll
@@ -5,20 +5,18 @@
define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocapture %q) nounwind {
; CHECK-LLC-LABEL: foo:
; CHECK-LLC: # %bb.0: # %entry
-; CHECK-LLC-NEXT: lu12i.w $a3, -1
-; CHECK-LLC-NEXT: lu12i.w $a4, 1
+; CHECK-LLC-NEXT: move $a3, $zero
+; CHECK-LLC-NEXT: ori $a4, $zero, 1024
; CHECK-LLC-NEXT: .p2align 4, , 16
; CHECK-LLC-NEXT: .LBB0_1: # %for.body
; CHECK-LLC-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-LLC-NEXT: add.d $a5, $a0, $a3
-; CHECK-LLC-NEXT: ldx.w $a5, $a5, $a4
-; CHECK-LLC-NEXT: add.d $a6, $a1, $a3
-; CHECK-LLC-NEXT: ldx.w $a6, $a6, $a4
+; CHECK-LLC-NEXT: ldx.w $a5, $a0, $a3
+; CHECK-LLC-NEXT: ldx.w $a6, $a1, $a3
; CHECK-LLC-NEXT: add.d $a5, $a6, $a5
-; CHECK-LLC-NEXT: add.d $a6, $a2, $a3
+; CHECK-LLC-NEXT: stx.w $a5, $a2, $a3
+; CHECK-LLC-NEXT: addi.d $a4, $a4, -1
; CHECK-LLC-NEXT: addi.d $a3, $a3, 4
-; CHECK-LLC-NEXT: stptr.w $a5, $a6, 4096
-; CHECK-LLC-NEXT: bnez $a3, .LBB0_1
+; CHECK-LLC-NEXT: bnez $a4, .LBB0_1
; CHECK-LLC-NEXT: # %bb.2: # %for.cond.cleanup
; CHECK-LLC-NEXT: ret
; CHECK-OPT-LABEL: define void @foo(
@@ -28,18 +26,17 @@ define void @foo(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
; CHECK-OPT: [[FOR_COND_CLEANUP:.*]]:
; CHECK-OPT-NEXT: ret void
; CHECK-OPT: [[FOR_BODY]]:
-; CHECK-OPT-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ -4096, %[[ENTRY]] ]
+; CHECK-OPT-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], %[[FOR_BODY]] ], [ 0, %[[ENTRY]] ]
+; CHECK-OPT-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 1024, %[[ENTRY]] ]
; CHECK-OPT-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]]
-; CHECK-OPT-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[SCEVGEP4]], i64 4096
-; CHECK-OPT-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP5]], align 4
+; CHECK-OPT-NEXT: [[LDTMP:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
; CHECK-OPT-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]]
-; CHECK-OPT-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4096
-; CHECK-OPT-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
+; CHECK-OPT-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[SCEVGEP2]], align 4
; CHECK-OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]]
; CHECK-OPT-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]]
-; CHECK-OPT-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 4096
-; CHECK-OPT-NEXT: store i32 [[ADD]], ptr [[SCEVGEP1]], align 4
-; CHECK-OPT-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4
+; CHECK-OPT-NEXT: store i32 [[ADD]], ptr [[SCEVGEP]], align 4
+; CHECK-OPT-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV1]], -1
+; CHECK-OPT-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV]], 4
; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
;
@@ -69,19 +66,18 @@ define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
; CHECK-LLC-NEXT: addi.w $a4, $a3, 0
; CHECK-LLC-NEXT: blez $a4, .LBB1_3
; CHECK-LLC-NEXT: # %bb.1: # %for.body.preheader
+; CHECK-LLC-NEXT: move $a4, $zero
; CHECK-LLC-NEXT: bstrpick.d $a3, $a3, 31, 0
+; CHECK-LLC-NEXT: slli.d $a3, $a3, 2
; CHECK-LLC-NEXT: .p2align 4, , 16
; CHECK-LLC-NEXT: .LBB1_2: # %for.body
; CHECK-LLC-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-LLC-NEXT: ld.w $a4, $a0, 0
-; CHECK-LLC-NEXT: ld.w $a5, $a1, 0
-; CHECK-LLC-NEXT: add.d $a4, $a5, $a4
-; CHECK-LLC-NEXT: st.w $a4, $a2, 0
-; CHECK-LLC-NEXT: addi.d $a3, $a3, -1
-; CHECK-LLC-NEXT: addi.d $a2, $a2, 4
-; CHECK-LLC-NEXT: addi.d $a1, $a1, 4
-; CHECK-LLC-NEXT: addi.d $a0, $a0, 4
-; CHECK-LLC-NEXT: bnez $a3, .LBB1_2
+; CHECK-LLC-NEXT: ldx.w $a5, $a0, $a4
+; CHECK-LLC-NEXT: ldx.w $a6, $a1, $a4
+; CHECK-LLC-NEXT: add.d $a5, $a6, $a5
+; CHECK-LLC-NEXT: stx.w $a5, $a2, $a4
+; CHECK-LLC-NEXT: addi.d $a4, $a4, 4
+; CHECK-LLC-NEXT: bne $a3, $a4, .LBB1_2
; CHECK-LLC-NEXT: .LBB1_3: # %for.cond.cleanup
; CHECK-LLC-NEXT: ret
; CHECK-OPT-LABEL: define void @bar(
@@ -91,25 +87,23 @@ define void @bar(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr nocap
; CHECK-OPT-NEXT: br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
; CHECK-OPT: [[FOR_BODY_PREHEADER]]:
; CHECK-OPT-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-OPT-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[WIDE_TRIP_COUNT]], 2
; CHECK-OPT-NEXT: br label %[[FOR_BODY:.*]]
; CHECK-OPT: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
; CHECK-OPT-NEXT: br label %[[FOR_COND_CLEANUP]]
; CHECK-OPT: [[FOR_COND_CLEANUP]]:
; CHECK-OPT-NEXT: ret void
; CHECK-OPT: [[FOR_BODY]]:
-; CHECK-OPT-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ]
-; CHECK-OPT-NEXT: [[LSR_IV2:%.*]] = phi ptr [ [[SCEVGEP3:%.*]], %[[FOR_BODY]] ], [ [[Y]], %[[FOR_BODY_PREHEADER]] ]
-; CHECK-OPT-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], %[[FOR_BODY]] ], [ [[Q]], %[[FOR_BODY_PREHEADER]] ]
-; CHECK-OPT-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[WIDE_TRIP_COUNT]], %[[FOR_BODY_PREHEADER]] ]
+; CHECK-OPT-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-OPT-NEXT: [[LSR_IV4:%.*]] = getelementptr i8, ptr [[X]], i64 [[LSR_IV]]
; CHECK-OPT-NEXT: [[LDTMP:%.*]] = load i32, ptr [[LSR_IV4]], align 4
+; CHECK-OPT-NEXT: [[LSR_IV2:%.*]] = getelementptr i8, ptr [[Y]], i64 [[LSR_IV]]
; CHECK-OPT-NEXT: [[LDTMP1:%.*]] = load i32, ptr [[LSR_IV2]], align 4
; CHECK-OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[LDTMP1]], [[LDTMP]]
+; CHECK-OPT-NEXT: [[LSR_IV1:%.*]] = getelementptr i8, ptr [[Q]], i64 [[LSR_IV]]
; CHECK-OPT-NEXT: store i32 [[ADD]], ptr [[LSR_IV1]], align 4
-; CHECK-OPT-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1
-; CHECK-OPT-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
-; CHECK-OPT-NEXT: [[SCEVGEP3]] = getelementptr i8, ptr [[LSR_IV2]], i64 4
-; CHECK-OPT-NEXT: [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4
-; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; CHECK-OPT-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
+; CHECK-OPT-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], [[LSR_IV_NEXT]]
; CHECK-OPT-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]]
;
entry:
|
Similar to several other targets, this commit override
isLSRCostLessto set instruction number as the first prioritywhen LSR pass deciding the cost.
Besides, this commit also takes the extra temporary register
may be used into account in
NumRegs. This is same as riscv,see the reason in #92296.