Skip to content

Commit 0e41f80

Browse files
committed
[LoopInterchange] Avoid using CacheCost if cache line size is zero
Profitability decisions with `CacheCost` sometimes gave strange results when the cache line size was zero. This patch prevents `CacheCost` from being used when the cache line size is zero, because it doesn't make sense. This patch also prevents the `CacheCost` from being calculated in this case, which may reduce compilation time.
1 parent ad38c4c commit 0e41f80

File tree

2 files changed

+71
-2
lines changed

2 files changed

+71
-2
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,12 @@ std::optional<bool>
11301130
LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
11311131
const DenseMap<const Loop *, unsigned> &CostMap,
11321132
std::unique_ptr<CacheCost> &CC) {
1133+
// The `CacheCost` is not calculated if it is not considered worthwhile to use
1134+
// it. In this case we leave the profitability decision to the subsequent
1135+
// processes.
1136+
if (CC == nullptr)
1137+
return std::nullopt;
1138+
11331139
// This is the new cost model returned from loop cache analysis.
11341140
// A smaller index means the loop should be placed an outer loop, and vice
11351141
// versa.
@@ -1773,8 +1779,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17731779
});
17741780

17751781
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
1776-
std::unique_ptr<CacheCost> CC =
1777-
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1782+
1783+
std::unique_ptr<CacheCost> CC;
1784+
// If the cache line size is set to zero, it doesn't make sense to use
1785+
// `CacheCost` for profitability decisions. Avoid computing it in this case.
1786+
if (AR.TTI.getCacheLineSize() != 0)
1787+
CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
17781788

17791789
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
17801790
return PreservedAnalyses::all();
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; RUN: opt %s -passes=loop-interchange -cache-line-size=0 -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
2+
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -disable-output
3+
; RUN: FileCheck -input-file %t %s
4+
5+
;; In the following code, interchanging is unprofitable even if the cache line
6+
;; size is set to zero. There are cases where the default cache line size is
7+
;; zero, e.g., the target processor is not specified.
8+
;;
9+
;; #define N 100
10+
;; #define M 100
11+
;;
12+
;; // Extracted from SingleSource/Benchmarks/Polybench/datamining/correlation/correlation.c
13+
;; // in llvm-test-suite
14+
;; void f(double data[N][M], double mean[M], double stddev[M]) {
15+
;; for (int i = 0; i < N; i++) {
16+
;; for (int j = 0; j < M; j++) {
17+
;; data[i][j] -= mean[j];
18+
;; data[i][j] /= stddev[j];
19+
;; }
20+
;; }
21+
;; }
22+
23+
; CHECK: --- !Missed
24+
; CHECK-NEXT: Pass: loop-interchange
25+
; CHECK: Name: InterchangeNotProfitable
26+
; CHECK-NEXT: Function: f
27+
28+
define void @f(ptr noundef captures(none) %data, ptr noundef readonly captures(none) %mean, ptr noundef readonly captures(none) %stddev) {
29+
entry:
30+
br label %for.cond1.preheader
31+
32+
for.cond1.preheader:
33+
%indvars.iv30 = phi i64 [ 0, %entry ], [ %indvars.iv.next31, %for.cond.cleanup3 ]
34+
br label %for.body4
35+
36+
for.cond.cleanup:
37+
ret void
38+
39+
for.cond.cleanup3:
40+
%indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1
41+
%exitcond33 = icmp ne i64 %indvars.iv.next31, 100
42+
br i1 %exitcond33, label %for.cond1.preheader, label %for.cond.cleanup
43+
44+
for.body4:
45+
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
46+
%arrayidx = getelementptr inbounds nuw double, ptr %mean, i64 %indvars.iv
47+
%0 = load double, ptr %arrayidx, align 8
48+
%arrayidx8 = getelementptr inbounds nuw [100 x double], ptr %data, i64 %indvars.iv30, i64 %indvars.iv
49+
%1 = load double, ptr %arrayidx8, align 8
50+
%sub = fsub double %1, %0
51+
store double %sub, ptr %arrayidx8, align 8
52+
%arrayidx10 = getelementptr inbounds nuw double, ptr %stddev, i64 %indvars.iv
53+
%2 = load double, ptr %arrayidx10, align 8
54+
%div = fdiv double %sub, %2
55+
store double %div, ptr %arrayidx8, align 8
56+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
57+
%exitcond = icmp ne i64 %indvars.iv.next, 100
58+
br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
59+
}

0 commit comments

Comments
 (0)