From 0e41f800886c656ae72eb9e2ad243de4b31b604c Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Wed, 5 Feb 2025 13:01:08 +0000 Subject: [PATCH] [LoopInterchange] Avoid using CacheCost if cache line size is zero Profitability decisions with `CacheCost` sometimes gave strange results when the cache line size was zero. This patch prevents `CacheCost` from being used when the cache line size is zero, because it doesn't make sense. This patch also prevents the `CacheCost` from being calculated in this case, which may reduce compilation time. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 14 ++++- .../LoopInterchange/cache-line-size-zero.ll | 59 +++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopInterchange/cache-line-size-zero.ll diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index d88fdf41db7a8..adefad9285e42 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1130,6 +1130,12 @@ std::optional LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis( const DenseMap &CostMap, std::unique_ptr &CC) { + // The `CacheCost` is not calculated if it is not considered worthwhile to use + // it. In this case we leave the profitability decision to the subsequent + // processes. + if (CC == nullptr) + return std::nullopt; + // This is the new cost model returned from loop cache analysis. // A smaller index means the loop should be placed an outer loop, and vice // versa. @@ -1773,8 +1779,12 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN, }); DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI); - std::unique_ptr CC = - CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); + + std::unique_ptr CC; + // If the cache line size is set to zero, it doesn't make sense to use + // `CacheCost` for profitability decisions. Avoid computing it in this case. + if (AR.TTI.getCacheLineSize() != 0) + CC = CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI); if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN)) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/LoopInterchange/cache-line-size-zero.ll b/llvm/test/Transforms/LoopInterchange/cache-line-size-zero.ll new file mode 100644 index 0000000000000..bce47bce52325 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/cache-line-size-zero.ll @@ -0,0 +1,59 @@ +; RUN: opt %s -passes=loop-interchange -cache-line-size=0 -pass-remarks-output=%t -verify-dom-info -verify-loop-info \ +; RUN: -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange -disable-output +; RUN: FileCheck -input-file %t %s + +;; In the following code, interchanging is unprofitable even if the cache line +;; size is set to zero. There are cases where the default cache line size is +;; zero, e.g., the target processor is not specified. +;; +;; #define N 100 +;; #define M 100 +;; +;; // Extracted from SingleSource/Benchmarks/Polybench/datamining/correlation/correlation.c +;; // in llvm-test-suite +;; void f(double data[N][M], double mean[M], double stddev[M]) { +;; for (int i = 0; i < N; i++) { +;; for (int j = 0; j < M; j++) { +;; data[i][j] -= mean[j]; +;; data[i][j] /= stddev[j]; +;; } +;; } +;; } + +; CHECK: --- !Missed +; CHECK-NEXT: Pass: loop-interchange +; CHECK: Name: InterchangeNotProfitable +; CHECK-NEXT: Function: f + +define void @f(ptr noundef captures(none) %data, ptr noundef readonly captures(none) %mean, ptr noundef readonly captures(none) %stddev) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: + %indvars.iv30 = phi i64 [ 0, %entry ], [ %indvars.iv.next31, %for.cond.cleanup3 ] + br label %for.body4 + +for.cond.cleanup: + ret void + +for.cond.cleanup3: + %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1 + %exitcond33 = icmp ne i64 %indvars.iv.next31, 100 + br i1 %exitcond33, label %for.cond1.preheader, label %for.cond.cleanup + +for.body4: + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ] + %arrayidx = getelementptr inbounds nuw double, ptr %mean, i64 %indvars.iv + %0 = load double, ptr %arrayidx, align 8 + %arrayidx8 = getelementptr inbounds nuw [100 x double], ptr %data, i64 %indvars.iv30, i64 %indvars.iv + %1 = load double, ptr %arrayidx8, align 8 + %sub = fsub double %1, %0 + store double %sub, ptr %arrayidx8, align 8 + %arrayidx10 = getelementptr inbounds nuw double, ptr %stddev, i64 %indvars.iv + %2 = load double, ptr %arrayidx10, align 8 + %div = fdiv double %sub, %2 + store double %div, ptr %arrayidx8, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.body4, label %for.cond.cleanup3 +}