Skip to content

Commit 618e078

Browse files
committed
[DA] batch delinearization
This patch adds support for batch delinearization in DependenceAnalysis, similar to how Polly processes delinearization. Instead of analyzing pairs of memory accesses independently, this approach: 1. Collects all memory accesses in the function, grouped by base pointer. 2. For each base pointer, collects delinearization terms from ALL accesses. 3. Computes array dimensions once using all available terms. 4. Caches the results for use during pairwise dependence analysis. This leads to better precision because more terms are available when inferring array dimensions, especially for parametric arrays where dimension information may be spread across multiple accesses. The BatchDelinearization class is defined in Delinearization.h/cpp as a standalone module (similar to BatchAAResults pattern). Callers (LoopFuse, LoopUnrollAndJam, LoopInterchange, DDG) create BatchDelinearization themselves and pass it to DependenceInfo.
1 parent ad6e9f9 commit 618e078

File tree

13 files changed

+484
-31
lines changed

13 files changed

+484
-31
lines changed

llvm/include/llvm/Analysis/Delinearization.h

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,23 @@
1616
#ifndef LLVM_ANALYSIS_DELINEARIZATION_H
1717
#define LLVM_ANALYSIS_DELINEARIZATION_H
1818

19+
#include "llvm/ADT/ArrayRef.h"
20+
#include "llvm/ADT/DenseMap.h"
21+
#include "llvm/ADT/SmallVector.h"
1922
#include "llvm/IR/PassManager.h"
2023
#include "llvm/IR/Value.h"
24+
#include "llvm/Support/Compiler.h"
2125

2226
namespace llvm {
27+
class Function;
2328
class raw_ostream;
2429
template <typename T> class SmallVectorImpl;
2530
class GetElementPtrInst;
2631
class Instruction;
32+
class LoopInfo;
2733
class ScalarEvolution;
2834
class SCEV;
35+
class SCEVUnknown;
2936

3037
/// Compute the array dimensions Sizes from the set of Terms extracted from
3138
/// the memory access function of this SCEVAddRecExpr (second step of
@@ -164,6 +171,84 @@ bool getIndexExpressionsFromGEP(ScalarEvolution &SE,
164171
SmallVectorImpl<const SCEV *> &Subscripts,
165172
SmallVectorImpl<const SCEV *> &Sizes);
166173

174+
/// BatchDelinearization - A wrapper for batch delinearization that caches
175+
/// results across multiple queries. Similar to BatchAAResults, this class
176+
/// should be used when analyzing multiple memory accesses to the same base
177+
/// pointers, as it computes array dimensions once using terms from all
178+
/// accesses, leading to better precision.
179+
///
180+
/// This class collects all memory accesses in a function, groups them by base
181+
/// pointer, and computes array dimensions for each base pointer using terms
182+
/// from all accesses. The results are cached for efficient lookups during
183+
/// dependence analysis.
184+
///
185+
/// Usage:
186+
/// BatchDelinearization BD(F, SE, LI);
187+
/// BD.populate(); // Compute and cache delinearization info.
188+
/// // Then pass BD to DependenceInfo or query it directly.
189+
class LLVM_ABI BatchDelinearization {
190+
public:
191+
BatchDelinearization(Function &F, ScalarEvolution &SE, LoopInfo &LI)
192+
: F(F), SE(SE), LI(LI) {}
193+
194+
/// Populate the cache with delinearization information for all memory
195+
/// accesses in the function.
196+
void populate();
197+
198+
/// Check if the cache has been populated.
199+
bool isPopulated() const { return Populated; }
200+
201+
/// Get the cached array sizes for a base pointer.
202+
/// Returns nullptr if not found.
203+
const SmallVector<const SCEV *, 4> *
204+
getArraySizes(const SCEVUnknown *Base) const {
205+
auto It = ArraySizes.find(Base);
206+
return It != ArraySizes.end() ? &It->second : nullptr;
207+
}
208+
209+
/// Get the cached subscripts for an instruction.
210+
/// Returns nullptr if not found.
211+
const SmallVector<const SCEV *, 4> *
212+
getSubscripts(const Instruction *I) const {
213+
auto It = Subscripts.find(I);
214+
return It != Subscripts.end() ? &It->second : nullptr;
215+
}
216+
217+
/// Get the cached element size for a base pointer.
218+
/// Returns nullptr if not found.
219+
const SCEV *getElementSize(const SCEVUnknown *Base) const {
220+
auto It = ElementSizes.find(Base);
221+
return It != ElementSizes.end() ? It->second : nullptr;
222+
}
223+
224+
/// Get the ScalarEvolution instance.
225+
ScalarEvolution &getSE() { return SE; }
226+
const ScalarEvolution &getSE() const { return SE; }
227+
228+
/// Get the LoopInfo instance.
229+
LoopInfo &getLI() { return LI; }
230+
const LoopInfo &getLI() const { return LI; }
231+
232+
private:
233+
Function &F;
234+
ScalarEvolution &SE;
235+
LoopInfo &LI;
236+
237+
/// Map from base pointer to computed array dimension sizes.
238+
SmallDenseMap<const SCEVUnknown *, SmallVector<const SCEV *, 4>, 8>
239+
ArraySizes;
240+
241+
/// Map from instruction to pre-computed subscripts.
242+
SmallDenseMap<const Instruction *, SmallVector<const SCEV *, 4>, 16>
243+
Subscripts;
244+
245+
/// Element size for the array (used for validation).
246+
SmallDenseMap<const SCEVUnknown *, const SCEV *, 8> ElementSizes;
247+
248+
/// Flag indicating whether the cache has been populated.
249+
bool Populated = false;
250+
};
251+
167252
struct DelinearizationPrinterPass
168253
: public PassInfoMixin<DelinearizationPrinterPass> {
169254
explicit DelinearizationPrinterPass(raw_ostream &OS);

llvm/include/llvm/Analysis/DependenceAnalysis.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
namespace llvm {
5050
class AAResults;
5151
template <typename T> class ArrayRef;
52+
class BatchDelinearization;
5253
class Loop;
5354
class LoopInfo;
5455
class SCEVConstant;
@@ -335,8 +336,9 @@ class LLVM_ABI FullDependence final : public Dependence {
335336
/// DependenceInfo - This class is the main dependence-analysis driver.
336337
class DependenceInfo {
337338
public:
338-
DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI)
339-
: AA(AA), SE(SE), LI(LI), F(F) {}
339+
DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI,
340+
BatchDelinearization *BD = nullptr)
341+
: AA(AA), SE(SE), LI(LI), F(F), BatchDelin(BD) {}
340342

341343
/// Handle transitive invalidation when the cached analysis results go away.
342344
LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -355,11 +357,19 @@ class DependenceInfo {
355357

356358
Function *getFunction() const { return F; }
357359

360+
/// setBatchDelinearization - Set the BatchDelinearization instance to use
361+
/// for cached delinearization results.
362+
void setBatchDelinearization(BatchDelinearization *BD) { BatchDelin = BD; }
363+
364+
/// getBatchDelinearization - Get the BatchDelinearization instance.
365+
BatchDelinearization *getBatchDelinearization() const { return BatchDelin; }
366+
358367
private:
359368
AAResults *AA;
360369
ScalarEvolution *SE;
361370
LoopInfo *LI;
362371
Function *F;
372+
BatchDelinearization *BatchDelin;
363373

364374
/// Subscript - This private struct represents a pair of subscripts from
365375
/// a pair of potentially multi-dimensional array references. We use a

llvm/lib/Analysis/DDG.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//===----------------------------------------------------------------------===//
1111
#include "llvm/Analysis/DDG.h"
1212
#include "llvm/ADT/SCCIterator.h"
13+
#include "llvm/Analysis/Delinearization.h"
1314
#include "llvm/Analysis/LoopInfo.h"
1415
#include "llvm/Analysis/LoopIterator.h"
1516
#include "llvm/Support/CommandLine.h"
@@ -308,7 +309,9 @@ bool DDGBuilder::shouldCreatePiBlocks() const { return CreatePiBlocks; }
308309
DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
309310
LoopStandardAnalysisResults &AR) {
310311
Function *F = L.getHeader()->getParent();
311-
DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
312+
BatchDelinearization BD(*F, AR.SE, AR.LI);
313+
BD.populate();
314+
DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI, &BD);
312315
return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
313316
}
314317
AnalysisKey DDGAnalysis::Key;

llvm/lib/Analysis/Delinearization.cpp

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -960,3 +960,140 @@ PreservedAnalyses DelinearizationPrinterPass::run(Function &F,
960960
&AM.getResult<ScalarEvolutionAnalysis>(F));
961961
return PreservedAnalyses::all();
962962
}
963+
964+
//===----------------------------------------------------------------------===//
965+
// BatchDelinearization Implementation
966+
//===----------------------------------------------------------------------===//
967+
968+
/// Return true for a Load or Store instruction.
969+
static bool isLoadOrStore(const Instruction *I) {
970+
return isa<LoadInst>(I) || isa<StoreInst>(I);
971+
}
972+
973+
void BatchDelinearization::populate() {
974+
if (Populated)
975+
return;
976+
977+
Populated = true;
978+
979+
// Step 1: Collect all memory accesses grouped by base pointer.
980+
// Map from base pointer to list of (Instruction, AccessFunction) pairs.
981+
SmallDenseMap<const SCEVUnknown *,
982+
SmallVector<std::pair<Instruction *, const SCEV *>, 4>, 8>
983+
AccessesByBase;
984+
985+
for (Instruction &I : instructions(F)) {
986+
if (!isLoadOrStore(&I))
987+
continue;
988+
989+
Value *Ptr = getLoadStorePointerOperand(&I);
990+
Loop *L = LI.getLoopFor(I.getParent());
991+
const SCEV *AccessFn = SE.getSCEVAtScope(Ptr, L);
992+
const SCEVUnknown *Base =
993+
dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
994+
995+
if (!Base)
996+
continue;
997+
998+
// Only consider accesses where the base is loop invariant.
999+
if (L && !SE.isLoopInvariant(Base, L))
1000+
continue;
1001+
1002+
AccessesByBase[Base].push_back({&I, AccessFn});
1003+
}
1004+
1005+
// Step 2: For each base pointer, collect terms from ALL accesses and
1006+
// compute array dimensions once.
1007+
for (auto &Entry : AccessesByBase) {
1008+
const SCEVUnknown *Base = Entry.first;
1009+
auto &Accesses = Entry.second;
1010+
1011+
// Skip if there's only one access - no benefit from batch processing.
1012+
if (Accesses.size() < 2)
1013+
continue;
1014+
1015+
// Determine element size - use the smallest among all accesses.
1016+
const SCEV *ElemSize = nullptr;
1017+
for (auto &Access : Accesses) {
1018+
const SCEV *EltSize = SE.getElementSize(Access.first);
1019+
if (!ElemSize)
1020+
ElemSize = EltSize;
1021+
else if (SE.isKnownPredicate(ICmpInst::ICMP_ULT, EltSize, ElemSize))
1022+
ElemSize = EltSize;
1023+
}
1024+
1025+
if (!ElemSize)
1026+
continue;
1027+
1028+
ElementSizes[Base] = ElemSize;
1029+
1030+
// Collect parametric terms from all accesses to this base.
1031+
SmallVector<const SCEV *, 8> Terms;
1032+
for (auto &Access : Accesses) {
1033+
const SCEV *AccessFn = Access.second;
1034+
const SCEV *OffsetSCEV = SE.getMinusSCEV(AccessFn, Base);
1035+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffsetSCEV);
1036+
if (AR && AR->isAffine())
1037+
collectParametricTerms(SE, AR, Terms);
1038+
}
1039+
1040+
// Find array dimensions using all collected terms.
1041+
SmallVector<const SCEV *, 4> Sizes;
1042+
findArrayDimensions(SE, Terms, Sizes, ElemSize);
1043+
1044+
// Skip if we couldn't determine dimensions.
1045+
if (Sizes.size() < 2)
1046+
continue;
1047+
1048+
ArraySizes[Base] = Sizes;
1049+
1050+
// Pre-compute subscripts for each access using parametric sizes.
1051+
for (auto &Access : Accesses) {
1052+
Instruction *Inst = Access.first;
1053+
const SCEV *AccessFn = Access.second;
1054+
const SCEV *OffsetSCEV = SE.getMinusSCEV(AccessFn, Base);
1055+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OffsetSCEV);
1056+
1057+
if (!AR || !AR->isAffine())
1058+
continue;
1059+
1060+
SmallVector<const SCEV *, 4> Subs;
1061+
computeAccessFunctions(SE, AR, Subs, Sizes);
1062+
1063+
if (Subs.size() >= 2)
1064+
Subscripts[Inst] = std::move(Subs);
1065+
}
1066+
}
1067+
1068+
// Step 3: Try fixed-size array delinearization for accesses not yet cached.
1069+
// This handles arrays with known compile-time dimensions.
1070+
for (auto &Entry : AccessesByBase) {
1071+
auto &Accesses = Entry.second;
1072+
1073+
for (auto &Access : Accesses) {
1074+
Instruction *Inst = Access.first;
1075+
1076+
// Skip if already cached from parametric delinearization.
1077+
if (Subscripts.count(Inst))
1078+
continue;
1079+
1080+
const SCEV *AccessFn = Access.second;
1081+
const SCEV *ElemSize = SE.getElementSize(Inst);
1082+
SmallVector<const SCEV *, 4> Subs, Sizes;
1083+
1084+
if (delinearizeFixedSizeArray(SE, SE.removePointerBase(AccessFn), Subs,
1085+
Sizes, ElemSize) &&
1086+
Subs.size() >= 2) {
1087+
Subscripts[Inst] = std::move(Subs);
1088+
}
1089+
}
1090+
}
1091+
1092+
LLVM_DEBUG({
1093+
dbgs() << "Batch delinearization cache populated:\n";
1094+
dbgs() << " Base pointers with cached dimensions: " << ArraySizes.size()
1095+
<< "\n";
1096+
dbgs() << " Instructions with cached subscripts: " << Subscripts.size()
1097+
<< "\n";
1098+
});
1099+
}

0 commit comments

Comments
 (0)