|
45 | 45 | #include "llvm/IR/Instruction.h" |
46 | 46 | #include "llvm/IR/Instructions.h" |
47 | 47 | #include "llvm/IR/IntrinsicInst.h" |
| 48 | +#include "llvm/IR/MDBuilder.h" |
48 | 49 | #include "llvm/IR/Module.h" |
49 | 50 | #include "llvm/IR/Operator.h" |
50 | 51 | #include "llvm/IR/Type.h" |
@@ -2498,6 +2499,236 @@ OptimizeGlobalAliases(Module &M, |
2498 | 2499 | return Changed; |
2499 | 2500 | } |
2500 | 2501 |
|
| 2502 | +struct AccessPattern { |
| 2503 | + Type *Ty; |
| 2504 | + |
| 2505 | + APInt Stride; |
| 2506 | + APInt Offset; |
| 2507 | +}; |
| 2508 | + |
| 2509 | +template <> struct DenseMapInfo<AccessPattern> { |
| 2510 | + static inline AccessPattern getEmptyKey() { |
| 2511 | + return {(Type *)1, APInt(), APInt()}; |
| 2512 | + } |
| 2513 | + static inline AccessPattern getTombstoneKey() { |
| 2514 | + return {(Type *)2, APInt(), APInt()}; |
| 2515 | + } |
| 2516 | + static unsigned getHashValue(const AccessPattern &AP) { |
| 2517 | + return hash_combine(AP.Ty, AP.Stride, AP.Offset); |
| 2518 | + } |
| 2519 | + static bool isEqual(const AccessPattern &LHS, const AccessPattern &RHS) { |
| 2520 | + return LHS.Ty == RHS.Ty && LHS.Stride == RHS.Stride && |
| 2521 | + LHS.Offset == RHS.Offset; |
| 2522 | + } |
| 2523 | +}; |
| 2524 | + |
| 2525 | +// return (gcd, x, y) such that a*x + b*y = gcd |
| 2526 | +std::tuple<APInt, APInt, APInt> ExtendedSignedGCD(APInt a, APInt b) { |
| 2527 | + unsigned BW = a.getBitWidth(); |
| 2528 | + APInt x = APInt(BW, 1); |
| 2529 | + APInt y = APInt(BW, 0); |
| 2530 | + APInt x1 = APInt(BW, 0); |
| 2531 | + APInt y1 = APInt(BW, 1); |
| 2532 | + |
| 2533 | + while (b != 0) { |
| 2534 | + APInt q = APInt(BW, 0); |
| 2535 | + APInt r = APInt(BW, 0); |
| 2536 | + APInt::sdivrem(a, b, q, r); |
| 2537 | + a = std::move(b); |
| 2538 | + b = std::move(r); |
| 2539 | + |
| 2540 | + std::swap(x, x1); |
| 2541 | + std::swap(y, y1); |
| 2542 | + x1 -= q * x; |
| 2543 | + y1 -= q * y; |
| 2544 | + } |
| 2545 | + return {a, x, y}; |
| 2546 | +} |
| 2547 | + |
| 2548 | +// Build if possible a new pair of Stride and Offset that are part of the |
| 2549 | +// original but are also aligned. |
| 2550 | +std::optional<std::pair<APInt, APInt>> |
| 2551 | +AlignStrideAndOffset(const APInt &Stride, const APInt &Offset, |
| 2552 | + const APInt &Align) { |
| 2553 | + // Here Offset * Align is added only to make sure Missing is positive or zero |
| 2554 | + APInt Missing = ((Offset * Align) - Offset).urem(Align); |
| 2555 | + |
| 2556 | + // fast path for common case, |
| 2557 | + if (Missing == 0) |
| 2558 | + return { |
| 2559 | + {(Stride * Align).udiv(APIntOps::GreatestCommonDivisor(Stride, Align)), |
| 2560 | + Offset}}; |
| 2561 | + |
| 2562 | + auto [GCD, X, Y] = ExtendedSignedGCD(Stride, Align); |
| 2563 | + assert(APIntOps::GreatestCommonDivisor(Stride, Align) == GCD); |
| 2564 | + assert((X * Stride + Y * Align) == GCD); |
| 2565 | + |
| 2566 | + if (Missing.urem(GCD) != 0) { |
| 2567 | + // The new Stride + Offset cannot be created because there is no elements in |
| 2568 | + // the original that would be properly aligned |
| 2569 | + return std::nullopt; |
| 2570 | + } |
| 2571 | + |
| 2572 | + APInt StrideAlign = Stride * Align; |
| 2573 | + // X could be negative, so we need to use sdiv |
| 2574 | + // Here + Offset * Align is added only to make sure Missing is positive |
| 2575 | + APInt NumStride = |
| 2576 | + (((Missing * X).sdiv(GCD)) + (StrideAlign * Align)).urem(Align); |
| 2577 | + |
| 2578 | + APInt NewStride = StrideAlign.udiv(GCD); |
| 2579 | + APInt NewOffset = (Offset + (NumStride * Stride)).urem(NewStride); |
| 2580 | + return {{std::move(NewStride), std::move(NewOffset)}}; |
| 2581 | +} |
| 2582 | + |
| 2583 | +static bool addRangeMetadata(Module &M) { |
| 2584 | + const DataLayout &DL = M.getDataLayout(); |
| 2585 | + bool Changed = false; |
| 2586 | + |
| 2587 | + for (GlobalValue &Global : M.global_values()) { |
| 2588 | + |
| 2589 | + auto *GV = dyn_cast<GlobalVariable>(&Global); |
| 2590 | + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) |
| 2591 | + continue; |
| 2592 | + |
| 2593 | + // To be able to go to the next GlobalVariable with a return |
| 2594 | + [&] { |
| 2595 | + unsigned IndexBW = DL.getIndexTypeSizeInBits(GV->getType()); |
| 2596 | + |
| 2597 | + struct PointerInfo { |
| 2598 | + Value *Ptr; |
| 2599 | + |
| 2600 | + // Zero denotes not set |
| 2601 | + APInt Stride; |
| 2602 | + APInt Offset; |
| 2603 | + }; |
| 2604 | + |
| 2605 | + // GEPs only take one pointer operand, the one we will come from, so we |
| 2606 | + // dont need to do uniqueing during the DFS |
| 2607 | + SmallVector<PointerInfo> Stack; |
| 2608 | + |
| 2609 | + // All loads of the global that this code can analyze grouped by access |
| 2610 | + // pattern. Loads with the same access pattern can access the same offsets |
| 2611 | + // in the global, so they can be treated the same. |
| 2612 | + SmallDenseMap<AccessPattern, SmallVector<LoadInst *>> LoadsByAccess; |
| 2613 | + |
| 2614 | + Stack.push_back({GV, APInt(IndexBW, 0), APInt(IndexBW, 0)}); |
| 2615 | + |
| 2616 | + while (!Stack.empty()) { |
| 2617 | + PointerInfo Curr = Stack.pop_back_val(); |
| 2618 | + |
| 2619 | + if (!isa<GlobalVariable>(Curr.Ptr)) { |
| 2620 | + if (auto *LI = dyn_cast<LoadInst>(Curr.Ptr)) { |
| 2621 | + |
| 2622 | + if (!LI->getType()->isIntegerTy()) |
| 2623 | + continue; |
| 2624 | + |
| 2625 | + if (LI->hasMetadata(LLVMContext::MD_range)) |
| 2626 | + continue; |
| 2627 | + |
| 2628 | + // This is an access at a fixed offset, I expect this is handled |
| 2629 | + // elsewhere so we skip it. |
| 2630 | + if (Curr.Stride == 0) |
| 2631 | + continue; |
| 2632 | + |
| 2633 | + // This case is very rare, but what it means is that we |
| 2634 | + // dont know at compile-time what offsets into the Global arrays are |
| 2635 | + // safe to access with this load. So we give-up. |
| 2636 | + if (LI->getAlign() > GV->getAlign().valueOrOne()) |
| 2637 | + continue; |
| 2638 | + |
| 2639 | + auto NewStrideAndOffset = |
| 2640 | + AlignStrideAndOffset(Curr.Stride, Curr.Offset, |
| 2641 | + APInt(IndexBW, LI->getAlign().value())); |
| 2642 | + |
| 2643 | + if (!NewStrideAndOffset) { |
| 2644 | + // This load cannot access an offset with the correct alignment |
| 2645 | + LI->replaceAllUsesWith(PoisonValue::get(LI->getType())); |
| 2646 | + continue; |
| 2647 | + } |
| 2648 | + |
| 2649 | + AccessPattern AP{LI->getType(), NewStrideAndOffset->first, |
| 2650 | + NewStrideAndOffset->second}; |
| 2651 | + assert(AP.Stride != 0); |
| 2652 | + LoadsByAccess[AP].push_back(LI); |
| 2653 | + continue; |
| 2654 | + } |
| 2655 | + auto *GEP = dyn_cast<GetElementPtrInst>(Curr.Ptr); |
| 2656 | + if (!GEP) |
| 2657 | + continue; |
| 2658 | + |
| 2659 | + SmallMapVector<Value *, APInt, 4> VarOffsets; |
| 2660 | + if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset)) |
| 2661 | + continue; |
| 2662 | + |
| 2663 | + for (auto [V, Scale] : VarOffsets) { |
| 2664 | + |
| 2665 | + // Commented out because I dont understand why we would need this |
| 2666 | + // But it was part of getStrideAndModOffsetOfGEP |
| 2667 | + // // Only keep a power of two factor for non-inbounds |
| 2668 | + // if (!GEP->isInBounds()) |
| 2669 | + // Scale = |
| 2670 | + // APInt::getOneBitSet(Scale.getBitWidth(), |
| 2671 | + // Scale.countr_zero()); |
| 2672 | + |
| 2673 | + if (Curr.Stride == 0) |
| 2674 | + Curr.Stride = Scale; |
| 2675 | + else |
| 2676 | + Curr.Stride = APIntOps::GreatestCommonDivisor(Curr.Stride, Scale); |
| 2677 | + } |
| 2678 | + } |
| 2679 | + |
| 2680 | + for (User *U : Curr.Ptr->users()) { |
| 2681 | + if (isa<LoadInst, GetElementPtrInst>(U)) { |
| 2682 | + Curr.Ptr = U; |
| 2683 | + Stack.push_back(Curr); |
| 2684 | + } |
| 2685 | + } |
| 2686 | + } |
| 2687 | + |
| 2688 | + for (auto [AP, Loads] : LoadsByAccess) { |
| 2689 | + { |
| 2690 | + APInt SMin = APInt::getSignedMaxValue(AP.Ty->getIntegerBitWidth()); |
| 2691 | + APInt SMax = APInt::getSignedMinValue(AP.Ty->getIntegerBitWidth()); |
| 2692 | + |
| 2693 | + APInt LastValidOffset = |
| 2694 | + APInt(IndexBW, DL.getTypeAllocSize(GV->getValueType()) - |
| 2695 | + DL.getTypeStoreSize(AP.Ty)); |
| 2696 | + for (APInt Offset = AP.Offset; Offset.ule(LastValidOffset); |
| 2697 | + Offset += AP.Stride) { |
| 2698 | + assert(Offset.isAligned(Loads[0]->getAlign())); |
| 2699 | + Constant *Cst = ConstantFoldLoadFromConstPtr(GV, AP.Ty, Offset, DL); |
| 2700 | + |
| 2701 | + if (!isa_and_nonnull<ConstantInt>(Cst)) |
| 2702 | + // Lambda captures of a struct binding is only available starting |
| 2703 | + // in C++20, so we skip to the next element with goto |
| 2704 | + goto NextGroup; |
| 2705 | + |
| 2706 | + // MD_range is order agnostics |
| 2707 | + SMin = APIntOps::smin(SMin, Cst->getUniqueInteger()); |
| 2708 | + SMax = APIntOps::smax(SMax, Cst->getUniqueInteger()); |
| 2709 | + } |
| 2710 | + |
| 2711 | + MDBuilder MDHelper(M.getContext()); |
| 2712 | + |
| 2713 | + Changed = true; |
| 2714 | + if (SMin == SMax) { |
| 2715 | + for (LoadInst *LI : Loads) |
| 2716 | + LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin)); |
| 2717 | + } else { |
| 2718 | + // The Range is allowed to wrap |
| 2719 | + MDNode *RNode = MDHelper.createRange(SMin, SMax + 1); |
| 2720 | + for (LoadInst *LI : Loads) |
| 2721 | + LI->setMetadata(LLVMContext::MD_range, RNode); |
| 2722 | + } |
| 2723 | + } |
| 2724 | + NextGroup: |
| 2725 | + (void)0; // Label expect statements |
| 2726 | + } |
| 2727 | + }(); |
| 2728 | + } |
| 2729 | + return Changed; |
| 2730 | +} |
| 2731 | + |
2501 | 2732 | static Function * |
2502 | 2733 | FindAtExitLibFunc(Module &M, |
2503 | 2734 | function_ref<TargetLibraryInfo &(Function &)> GetTLI, |
@@ -2887,6 +3118,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL, |
2887 | 3118 | Changed |= LocalChange; |
2888 | 3119 | } |
2889 | 3120 |
|
| 3121 | + // Add range metadata to loads from constant global variables based on the |
| 3122 | + // values that could be loaded from the variable |
| 3123 | + Changed |= addRangeMetadata(M); |
| 3124 | + |
2890 | 3125 | // TODO: Move all global ctors functions to the end of the module for code |
2891 | 3126 | // layout. |
2892 | 3127 |
|
|
0 commit comments