@@ -2641,6 +2641,165 @@ DeleteDeadIFuncs(Module &M,
26412641 return Changed;
26422642}
26432643
2644+ // Follows the use-def chain of \p V backwards until it finds a Function,
2645+ // in which case it collects in \p Versions. Return true on successful
2646+ // use-def chain traversal, false otherwise.
2647+ static bool collectVersions (TargetTransformInfo &TTI, Value *V,
2648+ SmallVectorImpl<Function *> &Versions) {
2649+ if (auto *F = dyn_cast<Function>(V)) {
2650+ if (!TTI.isMultiversionedFunction (*F))
2651+ return false ;
2652+ Versions.push_back (F);
2653+ } else if (auto *Sel = dyn_cast<SelectInst>(V)) {
2654+ if (!collectVersions (TTI, Sel->getTrueValue (), Versions))
2655+ return false ;
2656+ if (!collectVersions (TTI, Sel->getFalseValue (), Versions))
2657+ return false ;
2658+ } else if (auto *Phi = dyn_cast<PHINode>(V)) {
2659+ for (unsigned I = 0 , E = Phi->getNumIncomingValues (); I != E; ++I)
2660+ if (!collectVersions (TTI, Phi->getIncomingValue (I), Versions))
2661+ return false ;
2662+ } else {
2663+ // Unknown instruction type. Bail.
2664+ return false ;
2665+ }
2666+ return true ;
2667+ }
2668+
2669+ // Bypass the IFunc Resolver of MultiVersioned functions when possible. To
2670+ // deduce whether the optimization is legal we need to compare the target
2671+ // features between caller and callee versions. The criteria for bypassing
2672+ // the resolver are the following:
2673+ //
2674+ // * If the callee's feature set is a subset of the caller's feature set,
2675+ // then the callee is a candidate for direct call.
2676+ //
2677+ // * Among such candidates the one of highest priority is the best match
2678+ // and it shall be picked, unless there is a version of the callee with
2679+ // higher priority than the best match which cannot be picked from a
2680+ // higher priority caller (directly or through the resolver).
2681+ //
2682+ // * For every higher priority callee version than the best match, there
2683+ // is a higher priority caller version whose feature set availability
2684+ // is implied by the callee's feature set.
2685+ //
2686+ static bool OptimizeNonTrivialIFuncs (
2687+ Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2688+ bool Changed = false ;
2689+
2690+ // Cache containing the mask constructed from a function's target features.
2691+ DenseMap<Function *, uint64_t > FeatureMask;
2692+
2693+ for (GlobalIFunc &IF : M.ifuncs ()) {
2694+ if (IF.isInterposable ())
2695+ continue ;
2696+
2697+ Function *Resolver = IF.getResolverFunction ();
2698+ if (!Resolver)
2699+ continue ;
2700+
2701+ if (Resolver->isInterposable ())
2702+ continue ;
2703+
2704+ TargetTransformInfo &TTI = GetTTI (*Resolver);
2705+
2706+ // Discover the callee versions.
2707+ SmallVector<Function *> Callees;
2708+ if (any_of (*Resolver, [&TTI, &Callees](BasicBlock &BB) {
2709+ if (auto *Ret = dyn_cast_or_null<ReturnInst>(BB.getTerminator ()))
2710+ if (!collectVersions (TTI, Ret->getReturnValue (), Callees))
2711+ return true ;
2712+ return false ;
2713+ }))
2714+ continue ;
2715+
2716+ assert (!Callees.empty () && " Expecting successful collection of versions" );
2717+
2718+ // Cache the feature mask for each callee.
2719+ for (Function *Callee : Callees) {
2720+ auto [It, Inserted] = FeatureMask.try_emplace (Callee);
2721+ if (Inserted)
2722+ It->second = TTI.getFeatureMask (*Callee);
2723+ }
2724+
2725+ // Sort the callee versions in decreasing priority order.
2726+ sort (Callees, [&](auto *LHS, auto *RHS) {
2727+ return FeatureMask[LHS] > FeatureMask[RHS];
2728+ });
2729+
2730+ // Find the callsites and cache the feature mask for each caller.
2731+ SmallVector<Function *> Callers;
2732+ DenseMap<Function *, SmallVector<CallBase *>> CallSites;
2733+ for (User *U : IF.users ()) {
2734+ if (auto *CB = dyn_cast<CallBase>(U)) {
2735+ if (CB->getCalledOperand () == &IF) {
2736+ Function *Caller = CB->getFunction ();
2737+ auto [FeatIt, FeatInserted] = FeatureMask.try_emplace (Caller);
2738+ if (FeatInserted)
2739+ FeatIt->second = TTI.getFeatureMask (*Caller);
2740+ auto [CallIt, CallInserted] = CallSites.try_emplace (Caller);
2741+ if (CallInserted)
2742+ Callers.push_back (Caller);
2743+ CallIt->second .push_back (CB);
2744+ }
2745+ }
2746+ }
2747+
2748+ // Sort the caller versions in decreasing priority order.
2749+ sort (Callers, [&](auto *LHS, auto *RHS) {
2750+ return FeatureMask[LHS] > FeatureMask[RHS];
2751+ });
2752+
2753+ auto implies = [](uint64_t A, uint64_t B) { return (A & B) == B; };
2754+
2755+ // Index to the highest priority candidate.
2756+ unsigned I = 0 ;
2757+ // Now try to redirect calls starting from higher priority callers.
2758+ for (Function *Caller : Callers) {
2759+ assert (I < Callees.size () && " Found callers of equal priority" );
2760+
2761+ Function *Callee = Callees[I];
2762+ uint64_t CallerBits = FeatureMask[Caller];
2763+ uint64_t CalleeBits = FeatureMask[Callee];
2764+
2765+ // In the case of FMV callers, we know that all higher priority callers
2766+ // than the current one did not get selected at runtime, which helps
2767+ // reason about the callees (if they have versions that mandate presence
2768+ // of the features which we already know are unavailable on this target).
2769+ if (TTI.isMultiversionedFunction (*Caller)) {
2770+ // If the feature set of the caller implies the feature set of the
2771+ // highest priority candidate then it shall be picked. In case of
2772+ // identical sets advance the candidate index one position.
2773+ if (CallerBits == CalleeBits)
2774+ ++I;
2775+ else if (!implies (CallerBits, CalleeBits)) {
2776+ // Keep advancing the candidate index as long as the caller's
2777+ // features are a subset of the current candidate's.
2778+ while (implies (CalleeBits, CallerBits)) {
2779+ if (++I == Callees.size ())
2780+ break ;
2781+ CalleeBits = FeatureMask[Callees[I]];
2782+ }
2783+ continue ;
2784+ }
2785+ } else {
2786+ // We can't reason much about non-FMV callers. Just pick the highest
2787+ // priority callee if it matches, otherwise bail.
2788+ if (I > 0 || !implies (CallerBits, CalleeBits))
2789+ continue ;
2790+ }
2791+ auto &Calls = CallSites[Caller];
2792+ for (CallBase *CS : Calls)
2793+ CS->setCalledOperand (Callee);
2794+ Changed = true ;
2795+ }
2796+ if (IF.use_empty () ||
2797+ all_of (IF.users (), [](User *U) { return isa<GlobalAlias>(U); }))
2798+ NumIFuncsResolved++;
2799+ }
2800+ return Changed;
2801+ }
2802+
26442803static bool
26452804optimizeGlobalsInModule (Module &M, const DataLayout &DL,
26462805 function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2707,6 +2866,9 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
27072866 // Optimize IFuncs whose callee's are statically known.
27082867 LocalChange |= OptimizeStaticIFuncs (M);
27092868
2869+ // Optimize IFuncs based on the target features of the caller.
2870+ LocalChange |= OptimizeNonTrivialIFuncs (M, GetTTI);
2871+
27102872 // Remove any IFuncs that are now dead.
27112873 LocalChange |= DeleteDeadIFuncs (M, NotDiscardableComdats);
27122874
0 commit comments