Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,12 @@ std::optional<bool>
LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
const DenseMap<const Loop *, unsigned> &CostMap,
std::unique_ptr<CacheCost> &CC) {
// The `CacheCost` is not calculated if it is not considered worthwhile to use
// it. In this case we leave the profitability decision to the subsequent
// processes.
if (CC == nullptr)
return std::nullopt;

// This is the new cost model returned from loop cache analysis.
// A smaller index means the loop should be placed an outer loop, and vice
// versa.
Expand Down Expand Up @@ -1773,8 +1779,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
});

DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
std::unique_ptr<CacheCost> CC =
CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);

std::unique_ptr<CacheCost> CC;
// If the cache line size is set to zero, it doesn't make sense to use
// `CacheCost` for profitability decisions. Avoid computing it in this case.
if (AR.TTI.getCacheLineSize() != 0)
CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);

if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
return PreservedAnalyses::all();
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/Transforms/LoopInterchange/cache-line-size-zero.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
; RUN: opt %s -passes=loop-interchange -cache-line-size=0 -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -disable-output
; RUN: FileCheck -input-file %t %s

;; In the following code, interchanging is unprofitable even if the cache line
;; size is set to zero. There are cases where the default cache line size is
;; zero, e.g., the target processor is not specified.
;;
;; #define N 100
;; #define M 100
;;
;; // Extracted from SingleSource/Benchmarks/Polybench/datamining/correlation/correlation.c
;; // in llvm-test-suite
;; void f(double data[N][M], double mean[M], double stddev[M]) {
;; for (int i = 0; i < N; i++) {
;; for (int j = 0; j < M; j++) {
;; data[i][j] -= mean[j];
;; data[i][j] /= stddev[j];
;; }
;; }
;; }

; CHECK: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK: Name: InterchangeNotProfitable
; CHECK-NEXT: Function: f

define void @f(ptr noundef captures(none) %data, ptr noundef readonly captures(none) %mean, ptr noundef readonly captures(none) %stddev) {
entry:
br label %for.cond1.preheader

for.cond1.preheader:
%indvars.iv30 = phi i64 [ 0, %entry ], [ %indvars.iv.next31, %for.cond.cleanup3 ]
br label %for.body4

for.cond.cleanup:
ret void

for.cond.cleanup3:
%indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1
%exitcond33 = icmp ne i64 %indvars.iv.next31, 100
br i1 %exitcond33, label %for.cond1.preheader, label %for.cond.cleanup

for.body4:
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
%arrayidx = getelementptr inbounds nuw double, ptr %mean, i64 %indvars.iv
%0 = load double, ptr %arrayidx, align 8
%arrayidx8 = getelementptr inbounds nuw [100 x double], ptr %data, i64 %indvars.iv30, i64 %indvars.iv
%1 = load double, ptr %arrayidx8, align 8
%sub = fsub double %1, %0
store double %sub, ptr %arrayidx8, align 8
%arrayidx10 = getelementptr inbounds nuw double, ptr %stddev, i64 %indvars.iv
%2 = load double, ptr %arrayidx10, align 8
%div = fdiv double %sub, %2
store double %div, ptr %arrayidx8, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 100
br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
}