Skip to content

Commit eccfa9f

Browse files
committed
[GlobalOpt] Add range metadata to loads from constant global variables
1 parent 6ae5b89 commit eccfa9f

File tree

2 files changed

+668
-0
lines changed

2 files changed

+668
-0
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/IR/Instruction.h"
4646
#include "llvm/IR/Instructions.h"
4747
#include "llvm/IR/IntrinsicInst.h"
48+
#include "llvm/IR/MDBuilder.h"
4849
#include "llvm/IR/Module.h"
4950
#include "llvm/IR/Operator.h"
5051
#include "llvm/IR/Type.h"
@@ -2498,6 +2499,236 @@ OptimizeGlobalAliases(Module &M,
24982499
return Changed;
24992500
}
25002501

2502+
struct AccessPattern {
2503+
Type *Ty;
2504+
2505+
APInt Stride;
2506+
APInt Offset;
2507+
};
2508+
2509+
template <> struct DenseMapInfo<AccessPattern> {
2510+
static inline AccessPattern getEmptyKey() {
2511+
return {(Type *)1, APInt(), APInt()};
2512+
}
2513+
static inline AccessPattern getTombstoneKey() {
2514+
return {(Type *)2, APInt(), APInt()};
2515+
}
2516+
static unsigned getHashValue(const AccessPattern &AP) {
2517+
return hash_combine(AP.Ty, AP.Stride, AP.Offset);
2518+
}
2519+
static bool isEqual(const AccessPattern &LHS, const AccessPattern &RHS) {
2520+
return LHS.Ty == RHS.Ty && LHS.Stride == RHS.Stride &&
2521+
LHS.Offset == RHS.Offset;
2522+
}
2523+
};
2524+
2525+
// return (gcd, x, y) such that a*x + b*y = gcd
2526+
std::tuple<APInt, APInt, APInt> ExtendedSignedGCD(APInt a, APInt b) {
2527+
unsigned BW = a.getBitWidth();
2528+
APInt x = APInt(BW, 1);
2529+
APInt y = APInt(BW, 0);
2530+
APInt x1 = APInt(BW, 0);
2531+
APInt y1 = APInt(BW, 1);
2532+
2533+
while (b != 0) {
2534+
APInt q = APInt(BW, 0);
2535+
APInt r = APInt(BW, 0);
2536+
APInt::sdivrem(a, b, q, r);
2537+
a = std::move(b);
2538+
b = std::move(r);
2539+
2540+
std::swap(x, x1);
2541+
std::swap(y, y1);
2542+
x1 -= q * x;
2543+
y1 -= q * y;
2544+
}
2545+
return {a, x, y};
2546+
}
2547+
2548+
// Build if possible a new pair of Stride and Offset that are part of the
2549+
// original but are also aligned.
2550+
std::optional<std::pair<APInt, APInt>>
2551+
AlignStrideAndOffset(const APInt &Stride, const APInt &Offset,
2552+
const APInt &Align) {
2553+
// Here Offset * Align is added only to make sure Missing is positive or zero
2554+
APInt Missing = ((Offset * Align) - Offset).urem(Align);
2555+
2556+
// fast path for common case,
2557+
if (Missing == 0)
2558+
return {
2559+
{(Stride * Align).udiv(APIntOps::GreatestCommonDivisor(Stride, Align)),
2560+
Offset}};
2561+
2562+
auto [GCD, X, Y] = ExtendedSignedGCD(Stride, Align);
2563+
assert(APIntOps::GreatestCommonDivisor(Stride, Align) == GCD);
2564+
assert((X * Stride + Y * Align) == GCD);
2565+
2566+
if (Missing.urem(GCD) != 0) {
2567+
// The new Stride + Offset cannot be created because there is no elements in
2568+
// the original that would be properly aligned
2569+
return std::nullopt;
2570+
}
2571+
2572+
APInt StrideAlign = Stride * Align;
2573+
// X could be negative, so we need to use sdiv
2574+
// Here + Offset * Align is added only to make sure Missing is positive
2575+
APInt NumStride =
2576+
(((Missing * X).sdiv(GCD)) + (StrideAlign * Align)).urem(Align);
2577+
2578+
APInt NewStride = StrideAlign.udiv(GCD);
2579+
APInt NewOffset = (Offset + (NumStride * Stride)).urem(NewStride);
2580+
return {{std::move(NewStride), std::move(NewOffset)}};
2581+
}
2582+
2583+
static bool addRangeMetadata(Module &M) {
2584+
const DataLayout &DL = M.getDataLayout();
2585+
bool Changed = false;
2586+
2587+
for (GlobalValue &Global : M.global_values()) {
2588+
2589+
auto *GV = dyn_cast<GlobalVariable>(&Global);
2590+
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
2591+
continue;
2592+
2593+
// To be able to go to the next GlobalVariable with a return
2594+
[&] {
2595+
unsigned IndexBW = DL.getIndexTypeSizeInBits(GV->getType());
2596+
2597+
struct PointerInfo {
2598+
Value *Ptr;
2599+
2600+
// Zero denotes not set
2601+
APInt Stride;
2602+
APInt Offset;
2603+
};
2604+
2605+
// GEPs only take one pointer operand, the one we will come from, so we
2606+
// dont need to do uniqueing during the DFS
2607+
SmallVector<PointerInfo> Stack;
2608+
2609+
// All loads of the global that this code can analyze grouped by access
2610+
// pattern. Loads with the same access pattern can access the same offsets
2611+
// in the global, so they can be treated the same.
2612+
SmallDenseMap<AccessPattern, SmallVector<LoadInst *>> LoadsByAccess;
2613+
2614+
Stack.push_back({GV, APInt(IndexBW, 0), APInt(IndexBW, 0)});
2615+
2616+
while (!Stack.empty()) {
2617+
PointerInfo Curr = Stack.pop_back_val();
2618+
2619+
if (!isa<GlobalVariable>(Curr.Ptr)) {
2620+
if (auto *LI = dyn_cast<LoadInst>(Curr.Ptr)) {
2621+
2622+
if (!LI->getType()->isIntegerTy())
2623+
continue;
2624+
2625+
if (LI->hasMetadata(LLVMContext::MD_range))
2626+
continue;
2627+
2628+
// This is an access at a fixed offset, I expect this is handled
2629+
// elsewhere so we skip it.
2630+
if (Curr.Stride == 0)
2631+
continue;
2632+
2633+
// This case is very rare, but what it means is that we
2634+
// dont know at compile-time what offsets into the Global arrays are
2635+
// safe to access with this load. So we give-up.
2636+
if (LI->getAlign() > GV->getAlign().valueOrOne())
2637+
continue;
2638+
2639+
auto NewStrideAndOffset =
2640+
AlignStrideAndOffset(Curr.Stride, Curr.Offset,
2641+
APInt(IndexBW, LI->getAlign().value()));
2642+
2643+
if (!NewStrideAndOffset) {
2644+
// This load cannot access an offset with the correct alignment
2645+
LI->replaceAllUsesWith(PoisonValue::get(LI->getType()));
2646+
continue;
2647+
}
2648+
2649+
AccessPattern AP{LI->getType(), NewStrideAndOffset->first,
2650+
NewStrideAndOffset->second};
2651+
assert(AP.Stride != 0);
2652+
LoadsByAccess[AP].push_back(LI);
2653+
continue;
2654+
}
2655+
auto *GEP = dyn_cast<GetElementPtrInst>(Curr.Ptr);
2656+
if (!GEP)
2657+
continue;
2658+
2659+
SmallMapVector<Value *, APInt, 4> VarOffsets;
2660+
if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset))
2661+
continue;
2662+
2663+
for (auto [V, Scale] : VarOffsets) {
2664+
2665+
// Commented out because I dont understand why we would need this
2666+
// But it was part of getStrideAndModOffsetOfGEP
2667+
// // Only keep a power of two factor for non-inbounds
2668+
// if (!GEP->isInBounds())
2669+
// Scale =
2670+
// APInt::getOneBitSet(Scale.getBitWidth(),
2671+
// Scale.countr_zero());
2672+
2673+
if (Curr.Stride == 0)
2674+
Curr.Stride = Scale;
2675+
else
2676+
Curr.Stride = APIntOps::GreatestCommonDivisor(Curr.Stride, Scale);
2677+
}
2678+
}
2679+
2680+
for (User *U : Curr.Ptr->users()) {
2681+
if (isa<LoadInst, GetElementPtrInst>(U)) {
2682+
Curr.Ptr = U;
2683+
Stack.push_back(Curr);
2684+
}
2685+
}
2686+
}
2687+
2688+
for (auto [AP, Loads] : LoadsByAccess) {
2689+
{
2690+
APInt SMin = APInt::getSignedMaxValue(AP.Ty->getIntegerBitWidth());
2691+
APInt SMax = APInt::getSignedMinValue(AP.Ty->getIntegerBitWidth());
2692+
2693+
APInt LastValidOffset =
2694+
APInt(IndexBW, DL.getTypeAllocSize(GV->getValueType()) -
2695+
DL.getTypeStoreSize(AP.Ty));
2696+
for (APInt Offset = AP.Offset; Offset.ule(LastValidOffset);
2697+
Offset += AP.Stride) {
2698+
assert(Offset.isAligned(Loads[0]->getAlign()));
2699+
Constant *Cst = ConstantFoldLoadFromConstPtr(GV, AP.Ty, Offset, DL);
2700+
2701+
if (!isa_and_nonnull<ConstantInt>(Cst))
2702+
// Lambda captures of a struct binding is only available starting
2703+
// in C++20, so we skip to the next element with goto
2704+
goto NextGroup;
2705+
2706+
// MD_range is order agnostics
2707+
SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());
2708+
SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());
2709+
}
2710+
2711+
MDBuilder MDHelper(M.getContext());
2712+
2713+
Changed = true;
2714+
if (SMin == SMax) {
2715+
for (LoadInst *LI : Loads)
2716+
LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin));
2717+
} else {
2718+
// The Range is allowed to wrap
2719+
MDNode *RNode = MDHelper.createRange(SMin, SMax + 1);
2720+
for (LoadInst *LI : Loads)
2721+
LI->setMetadata(LLVMContext::MD_range, RNode);
2722+
}
2723+
}
2724+
NextGroup:
2725+
(void)0; // Label expect statements
2726+
}
2727+
}();
2728+
}
2729+
return Changed;
2730+
}
2731+
25012732
static Function *
25022733
FindAtExitLibFunc(Module &M,
25032734
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2887,6 +3118,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
28873118
Changed |= LocalChange;
28883119
}
28893120

3121+
// Add range metadata to loads from constant global variables based on the
3122+
// values that could be loaded from the variable
3123+
Changed |= addRangeMetadata(M);
3124+
28903125
// TODO: Move all global ctors functions to the end of the module for code
28913126
// layout.
28923127

0 commit comments

Comments
 (0)