diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index f88d51f443bcf..4449c1e74a612 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2482,20 +2482,21 @@ DeleteDeadIFuncs(Module &M, // Follows the use-def chain of \p V backwards until it finds a Function, // in which case it collects in \p Versions. Return true on successful // use-def chain traversal, false otherwise. -static bool collectVersions(TargetTransformInfo &TTI, Value *V, - SmallVectorImpl &Versions) { +static bool +collectVersions(Value *V, SmallVectorImpl &Versions, + function_ref GetTTI) { if (auto *F = dyn_cast(V)) { - if (!TTI.isMultiversionedFunction(*F)) + if (!GetTTI(*F).isMultiversionedFunction(*F)) return false; Versions.push_back(F); } else if (auto *Sel = dyn_cast(V)) { - if (!collectVersions(TTI, Sel->getTrueValue(), Versions)) + if (!collectVersions(Sel->getTrueValue(), Versions, GetTTI)) return false; - if (!collectVersions(TTI, Sel->getFalseValue(), Versions)) + if (!collectVersions(Sel->getFalseValue(), Versions, GetTTI)) return false; } else if (auto *Phi = dyn_cast(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) - if (!collectVersions(TTI, Phi->getIncomingValue(I), Versions)) + if (!collectVersions(Phi->getIncomingValue(I), Versions, GetTTI)) return false; } else { // Unknown instruction type. Bail. @@ -2525,8 +2526,14 @@ static bool OptimizeNonTrivialIFuncs( Module &M, function_ref GetTTI) { bool Changed = false; - // Cache containing the mask constructed from a function's target features. + // Map containing the feature bits for a given function. DenseMap FeatureMask; + // Map containing all the versions corresponding to an IFunc symbol. + DenseMap> VersionedFuncs; + // Map containing the IFunc symbol a function is version of. + DenseMap VersionOf; + // List of all the interesting IFuncs found in the module. + SmallVector IFuncs; for (GlobalIFunc &IF : M.ifuncs()) { if (IF.isInterposable()) @@ -2539,107 +2546,150 @@ static bool OptimizeNonTrivialIFuncs( if (Resolver->isInterposable()) continue; - TargetTransformInfo &TTI = GetTTI(*Resolver); - - // Discover the callee versions. - SmallVector Callees; - if (any_of(*Resolver, [&TTI, &Callees](BasicBlock &BB) { + SmallVector Versions; + // Discover the versioned functions. + if (any_of(*Resolver, [&](BasicBlock &BB) { if (auto *Ret = dyn_cast_or_null(BB.getTerminator())) - if (!collectVersions(TTI, Ret->getReturnValue(), Callees)) + if (!collectVersions(Ret->getReturnValue(), Versions, GetTTI)) return true; return false; })) continue; - if (Callees.empty()) + if (Versions.empty()) continue; - LLVM_DEBUG(dbgs() << "Statically resolving calls to function " - << Resolver->getName() << "\n"); - - // Cache the feature mask for each callee. - for (Function *Callee : Callees) { - auto [It, Inserted] = FeatureMask.try_emplace(Callee); + for (Function *V : Versions) { + VersionOf.insert({V, &IF}); + auto [It, Inserted] = FeatureMask.try_emplace(V); if (Inserted) - It->second = TTI.getFeatureMask(*Callee); + It->second = GetTTI(*V).getFeatureMask(*V); } - // Sort the callee versions in decreasing priority order. - sort(Callees, [&](auto *LHS, auto *RHS) { + // Sort function versions in decreasing priority order. + sort(Versions, [&](auto *LHS, auto *RHS) { return FeatureMask[LHS].ugt(FeatureMask[RHS]); }); - // Find the callsites and cache the feature mask for each caller. - SmallVector Callers; + IFuncs.push_back(&IF); + VersionedFuncs.try_emplace(&IF, std::move(Versions)); + } + + for (GlobalIFunc *CalleeIF : IFuncs) { + SmallVector NonFMVCallers; + SmallVector CallerIFuncs; DenseMap> CallSites; - for (User *U : IF.users()) { + + // Find the callsites. + for (User *U : CalleeIF->users()) { if (auto *CB = dyn_cast(U)) { - if (CB->getCalledOperand() == &IF) { + if (CB->getCalledOperand() == CalleeIF) { Function *Caller = CB->getFunction(); - auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller); - if (FeatInserted) - FeatIt->second = TTI.getFeatureMask(*Caller); - auto [CallIt, CallInserted] = CallSites.try_emplace(Caller); - if (CallInserted) - Callers.push_back(Caller); - CallIt->second.push_back(CB); + GlobalIFunc *CallerIF = nullptr; + TargetTransformInfo &TTI = GetTTI(*Caller); + bool CallerIsFMV = TTI.isMultiversionedFunction(*Caller); + // The caller is a version of a known IFunc. + if (auto It = VersionOf.find(Caller); It != VersionOf.end()) + CallerIF = It->second; + else if (!CallerIsFMV && OptimizeNonFMVCallers) { + // The caller is non-FMV. + auto [It, Inserted] = FeatureMask.try_emplace(Caller); + if (Inserted) + It->second = TTI.getFeatureMask(*Caller); + } else + // The caller is none of the above, skip. + continue; + auto [It, Inserted] = CallSites.try_emplace(Caller); + if (Inserted) { + if (CallerIsFMV) + CallerIFuncs.push_back(CallerIF); + else + NonFMVCallers.push_back(Caller); + } + It->second.push_back(CB); } } } - // Sort the caller versions in decreasing priority order. - sort(Callers, [&](auto *LHS, auto *RHS) { - return FeatureMask[LHS].ugt(FeatureMask[RHS]); - }); - - auto implies = [](APInt A, APInt B) { return B.isSubsetOf(A); }; - - // Index to the highest priority candidate. - unsigned I = 0; - // Now try to redirect calls starting from higher priority callers. - for (Function *Caller : Callers) { - assert(I < Callees.size() && "Found callers of equal priority"); - - Function *Callee = Callees[I]; - APInt CallerBits = FeatureMask[Caller]; - APInt CalleeBits = FeatureMask[Callee]; + LLVM_DEBUG(dbgs() << "Statically resolving calls to function " + << CalleeIF->getResolverFunction()->getName() << "\n"); + + auto redirectCalls = [&](SmallVectorImpl &Callers, + SmallVectorImpl &Callees) { + // Index to the current callee candidate. + unsigned I = 0; + // Feature bits from callers of previous iterations. + SmallVector KnownBits; + + // Try to redirect calls starting from higher priority callers. + for (Function *Caller : Callers) { + if (I == Callees.size()) + break; - // In the case of FMV callers, we know that all higher priority callers - // than the current one did not get selected at runtime, which helps - // reason about the callees (if they have versions that mandate presence - // of the features which we already know are unavailable on this target). - if (TTI.isMultiversionedFunction(*Caller)) { - // If the feature set of the caller implies the feature set of the - // highest priority candidate then it shall be picked. In case of - // identical sets advance the candidate index one position. - if (CallerBits == CalleeBits) - ++I; - else if (!implies(CallerBits, CalleeBits)) { - // Keep advancing the candidate index as long as the caller's + bool CallerIsFMV = GetTTI(*Caller).isMultiversionedFunction(*Caller); + // We can't reason much about non-FMV callers. Just pick the highest + // priority callee if it matches, otherwise bail. + if (!CallerIsFMV) + assert(I == 0 && "Should only select the highest priority candidate"); + + APInt CallerBits = FeatureMask[Caller]; + APInt CalleeBits = FeatureMask[Callees[I]]; + // In the case of FMV callers, we know that all higher priority callers + // than the current one did not get selected at runtime, which helps + // reason about the callees (if they have versions that mandate presence + // of the features which we already know are unavailable on this + // target, then we can skip over those versions/candidates). + if (CallerIsFMV) { + // Keep advancing the candidate index as long as the unavailable // features are a subset of the current candidate's. - while (implies(CalleeBits, CallerBits)) { - if (++I == Callees.size()) - break; - CalleeBits = FeatureMask[Callees[I]]; + unsigned J = 0; + while (J < KnownBits.size()) { + // Discard feature bits that are known to be available + // in the current iteration. + APInt Version = KnownBits[J] & ~CallerBits; + if (Version.isSubsetOf(CalleeBits)) { + if (++I == Callees.size()) + break; + CalleeBits = FeatureMask[Callees[I]]; + // Start over. + J = 0; + } else + ++J; + } + KnownBits.push_back(CallerBits); + } + Function *Callee = Callees[I]; + // If the feature set of the caller implies the feature set of the + // highest priority candidate then it shall be picked. + if (CalleeBits.isSubsetOf(CallerBits)) { + // If there are no records of call sites for this particular function + // version, then it is not actually a caller, in which case skip. + if (auto It = CallSites.find(Caller); It != CallSites.end()) { + for (CallBase *CS : It->second) { + LLVM_DEBUG(dbgs() << "Redirecting call " << Caller->getName() + << " -> " << Callee->getName() << "\n"); + CS->setCalledOperand(Callee); + } + Changed = true; } - continue; } - } else { - // We can't reason much about non-FMV callers. Just pick the highest - // priority callee if it matches, otherwise bail. - if (!OptimizeNonFMVCallers || I > 0 || !implies(CallerBits, CalleeBits)) - continue; - } - auto &Calls = CallSites[Caller]; - for (CallBase *CS : Calls) { - LLVM_DEBUG(dbgs() << "Redirecting call " << Caller->getName() << " -> " - << Callee->getName() << "\n"); - CS->setCalledOperand(Callee); } - Changed = true; + }; + + auto &Callees = VersionedFuncs[CalleeIF]; + + // Optimize non-FMV calls. + if (OptimizeNonFMVCallers) + redirectCalls(NonFMVCallers, Callees); + + // Optimize FMV calls. + for (GlobalIFunc *CallerIF : CallerIFuncs) { + auto &Callers = VersionedFuncs[CallerIF]; + redirectCalls(Callers, Callees); } - if (IF.use_empty() || - all_of(IF.users(), [](User *U) { return isa(U); })) + + if (CalleeIF->use_empty() || + all_of(CalleeIF->users(), [](User *U) { return isa(U); })) NumIFuncsResolved++; } return Changed; diff --git a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll index 4b6a19d3f05cf..3a6866c4e16a4 100644 --- a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll +++ b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names)" --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names|test_unrelated_callers|test_clear_known_bits)" --version 4 ; REQUIRES: aarch64-registered-target @@ -13,6 +13,16 @@ $test_caller_feats_not_implied.resolver = comdat any $test_non_fmv_caller.resolver = comdat any $test_priority.resolver = comdat any $test_alternative_names.resolver = comdat any +$test_unrelated_callers.resolver = comdat any +$test_clear_known_bits.resolver = comdat any +$caller1.resolver = comdat any +$caller2.resolver = comdat any +$caller3.resolver = comdat any +$caller6.resolver = comdat any +$caller7.resolver = comdat any +$caller8.resolver = comdat any +$caller9.resolver = comdat any +$caller11.resolver = comdat any @__aarch64_cpu_features = external local_unnamed_addr global { i64 } @@ -22,6 +32,16 @@ $test_alternative_names.resolver = comdat any @test_non_fmv_caller = weak_odr ifunc i32 (), ptr @test_non_fmv_caller.resolver @test_priority = weak_odr ifunc i32 (), ptr @test_priority.resolver @test_alternative_names = weak_odr ifunc i32 (), ptr @test_alternative_names.resolver +@test_unrelated_callers = weak_odr ifunc i32 (), ptr @test_unrelated_callers.resolver +@test_clear_known_bits = weak_odr ifunc i32 (), ptr @test_clear_known_bits.resolver +@caller1 = weak_odr ifunc i32 (), ptr @caller1.resolver +@caller2 = weak_odr ifunc i32 (), ptr @caller2.resolver +@caller3 = weak_odr ifunc i32 (), ptr @caller3.resolver +@caller6 = weak_odr ifunc i32 (), ptr @caller6.resolver +@caller7 = weak_odr ifunc i32 (), ptr @caller7.resolver +@caller8 = weak_odr ifunc i32 (), ptr @caller8.resolver +@caller9 = weak_odr ifunc i32 (), ptr @caller9.resolver +@caller11 = weak_odr ifunc i32 (), ptr @caller11.resolver declare void @__init_cpu_features_resolver() local_unnamed_addr @@ -34,18 +54,18 @@ define weak_odr ptr @test_single_bb_resolver.resolver() comdat { resolver_entry: tail call void @__init_cpu_features_resolver() %0 = load i64, ptr @__aarch64_cpu_features, align 8 - %1 = and i64 %0, 68719476736 - %.not = icmp eq i64 %1, 0 - %2 = and i64 %0, 1073741824 - %.not3 = icmp eq i64 %2, 0 - %test_single_bb_resolver._Msve.test_single_bb_resolver.default = select i1 %.not3, ptr @test_single_bb_resolver.default, ptr @test_single_bb_resolver._Msve - %common.ret.op = select i1 %.not, ptr %test_single_bb_resolver._Msve.test_single_bb_resolver.default, ptr @test_single_bb_resolver._Msve2 + %1 = and i64 %0, 69793284352 + %2 = icmp eq i64 %1, 69793284352 + %3 = and i64 %0, 1073807616 + %4 = icmp eq i64 %3, 1073807616 + %test_single_bb_resolver._Msve.test_single_bb_resolver.default = select i1 %4, ptr @test_single_bb_resolver._Msve, ptr @test_single_bb_resolver.default + %common.ret.op = select i1 %2, ptr @test_single_bb_resolver._Msve2, ptr %test_single_bb_resolver._Msve.test_single_bb_resolver.default ret ptr %common.ret.op } define i32 @caller1._Msve() #1 { ; CHECK-LABEL: define i32 @caller1._Msve( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver._Msve() ; entry: @@ -55,7 +75,7 @@ entry: define i32 @caller1._Msve2() #2 { ; CHECK-LABEL: define i32 @caller1._Msve2( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver._Msve2() ; entry: @@ -65,7 +85,7 @@ entry: define i32 @caller1.default() #0 { ; CHECK-LABEL: define i32 @caller1.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver.default() ; entry: @@ -73,6 +93,20 @@ entry: ret i32 %call } +define weak_odr ptr @caller1.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller1.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 69793284352 + %2 = icmp eq i64 %1, 69793284352 + %3 = and i64 %0, 1073807616 + %4 = icmp eq i64 %3, 1073807616 + %caller1._Msve.caller1.default = select i1 %4, ptr @caller1._Msve, ptr @caller1.default + %common.ret.op = select i1 %2, ptr @caller1._Msve2, ptr %caller1._Msve.caller1.default + ret ptr %common.ret.op +} + declare i32 @test_multi_bb_resolver._Mmops() #3 declare i32 @test_multi_bb_resolver._Msve2() #2 declare i32 @test_multi_bb_resolver._Msve() #1 @@ -92,20 +126,20 @@ common.ret: ; preds = %resolver_else2, %re ret ptr %common.ret.op resolver_else: ; preds = %resolver_entry - %2 = and i64 %0, 68719476736 - %.not5 = icmp eq i64 %2, 0 - br i1 %.not5, label %resolver_else2, label %common.ret + %2 = and i64 %0, 69793284352 + %3 = icmp eq i64 %2, 69793284352 + br i1 %3, label %common.ret, label %resolver_else2 resolver_else2: ; preds = %resolver_else - %3 = and i64 %0, 1073741824 - %.not6 = icmp eq i64 %3, 0 - %test_multi_bb_resolver._Msve.test_multi_bb_resolver.default = select i1 %.not6, ptr @test_multi_bb_resolver.default, ptr @test_multi_bb_resolver._Msve + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %test_multi_bb_resolver._Msve.test_multi_bb_resolver.default = select i1 %5, ptr @test_multi_bb_resolver._Msve, ptr @test_multi_bb_resolver.default br label %common.ret } define i32 @caller2._MmopsMsve2() #4 { ; CHECK-LABEL: define i32 @caller2._MmopsMsve2( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR4:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver._Mmops() ; entry: @@ -115,7 +149,7 @@ entry: define i32 @caller2._Mmops() #3 { ; CHECK-LABEL: define i32 @caller2._Mmops( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver._Mmops() ; entry: @@ -125,7 +159,7 @@ entry: define i32 @caller2._Msve() #1 { ; CHECK-LABEL: define i32 @caller2._Msve( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver() ; entry: @@ -135,7 +169,7 @@ entry: define i32 @caller2.default() #0 { ; CHECK-LABEL: define i32 @caller2.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver.default() ; entry: @@ -143,6 +177,31 @@ entry: ret i32 %call } +define weak_odr ptr @caller2.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller2.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460822096707840 + %2 = icmp eq i64 %1, 576460822096707840 + br i1 %2, label %common.ret, label %resolver_else + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @caller2._MmopsMsve2, %resolver_entry ], [ @caller2._Mmops, %resolver_else ], [ %caller2._Msve.caller2.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %3 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %3, 0 + br i1 %.not, label %resolver_else2, label %common.ret + +resolver_else2: ; preds = %resolver_else + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %caller2._Msve.caller2.default = select i1 %5, ptr @caller2._Msve, ptr @caller2.default + br label %common.ret +} + declare i32 @test_caller_feats_not_implied._Mmops() #3 declare i32 @test_caller_feats_not_implied._Msme() #5 declare i32 @test_caller_feats_not_implied._Msve() #1 @@ -162,20 +221,20 @@ common.ret: ; preds = %resolver_else2, %re ret ptr %common.ret.op resolver_else: ; preds = %resolver_entry - %2 = and i64 %0, 4398046511104 - %.not5 = icmp eq i64 %2, 0 - br i1 %.not5, label %resolver_else2, label %common.ret + %2 = and i64 %0, 4398180795136 + %3 = icmp eq i64 %2, 4398180795136 + br i1 %3, label %common.ret, label %resolver_else2 resolver_else2: ; preds = %resolver_else - %3 = and i64 %0, 1073741824 - %.not6 = icmp eq i64 %3, 0 - %test_caller_feats_not_implied._Msve.test_caller_feats_not_implied.default = select i1 %.not6, ptr @test_caller_feats_not_implied.default, ptr @test_caller_feats_not_implied._Msve + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %test_caller_feats_not_implied._Msve.test_caller_feats_not_implied.default = select i1 %5, ptr @test_caller_feats_not_implied._Msve, ptr @test_caller_feats_not_implied.default br label %common.ret } define i32 @caller3._Mmops() #3 { ; CHECK-LABEL: define i32 @caller3._Mmops( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: ) #[[ATTR3]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied._Mmops() ; entry: @@ -185,7 +244,7 @@ entry: define i32 @caller3._Msve() #1 { ; CHECK-LABEL: define i32 @caller3._Msve( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied() ; entry: @@ -195,7 +254,7 @@ entry: define i32 @caller3.default() #0 { ; CHECK-LABEL: define i32 @caller3.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied() ; entry: @@ -203,6 +262,20 @@ entry: ret i32 %call } +define weak_odr ptr @caller3.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller3.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + %2 = and i64 %0, 1073807616 + %3 = icmp eq i64 %2, 1073807616 + %caller3._Msve.caller3.default = select i1 %3, ptr @caller3._Msve, ptr @caller3.default + %common.ret.op = select i1 %.not, ptr %caller3._Msve.caller3.default, ptr @caller3._Mmops + ret ptr %common.ret.op +} + declare i32 @test_non_fmv_caller._Maes() #6 declare i32 @test_non_fmv_caller._Msm4() #7 declare i32 @test_non_fmv_caller.default() #0 @@ -212,15 +285,18 @@ define weak_odr ptr @test_non_fmv_caller.resolver() comdat { resolver_entry: tail call void @__init_cpu_features_resolver() %0 = load i64, ptr @__aarch64_cpu_features, align 8 - %1 = and i64 %0, 32768 - %.not = icmp eq i64 %1, 0 - %test_non_fmv_caller._Maes.test_non_fmv_caller.default = select i1 %.not, ptr @test_non_fmv_caller.default, ptr @test_non_fmv_caller._Maes - ret ptr %test_non_fmv_caller._Maes.test_non_fmv_caller.default + %1 = and i64 %0, 33536 + %2 = icmp eq i64 %1, 33536 + %3 = and i64 %0, 800 + %4 = icmp eq i64 %3, 800 + %test_non_fmv_caller._Msm4.test_non_fmv_caller.default = select i1 %4, ptr @test_non_fmv_caller._Msm4, ptr @test_non_fmv_caller.default + %common.ret.op = select i1 %2, ptr @test_non_fmv_caller._Maes, ptr %test_non_fmv_caller._Msm4.test_non_fmv_caller.default + ret ptr %common.ret.op } define i32 @caller4() #8 { ; CHECK-LABEL: define i32 @caller4( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR7:[0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR8:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller._Maes() ; entry: @@ -230,7 +306,7 @@ entry: define i32 @caller5() #9 { ; CHECK-LABEL: define i32 @caller5( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR8:[0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR9:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller() ; entry: @@ -239,7 +315,7 @@ entry: } declare i32 @test_priority._Msve2-sha3() #10 -declare i32 @test_priority._Mls64Mssbs() #11 +declare i32 @test_priority._McsscMssbs() #11 declare i32 @test_priority._MflagmMlseMrng() #12 declare i32 @test_priority.default() #0 @@ -248,36 +324,57 @@ define weak_odr ptr @test_priority.resolver() comdat { resolver_entry: tail call void @__init_cpu_features_resolver() %0 = load i64, ptr @__aarch64_cpu_features, align 8 - %1 = and i64 %0, 131 - %2 = icmp eq i64 %1, 131 + %1 = and i64 %0, 562949953423360 + %2 = icmp eq i64 %1, 562949953423360 br i1 %2, label %common.ret, label %resolver_else common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry - %common.ret.op = phi ptr [ @test_priority._MflagmMlseMrng, %resolver_entry ], [ @test_priority._Mls64Mssbs, %resolver_else ], [ %test_priority._Msve2-sha3.test_priority.default, %resolver_else2 ] + %common.ret.op = phi ptr [ @test_priority._McsscMssbs, %resolver_entry ], [ @test_priority._Msve2-sha3, %resolver_else ], [ %test_priority._MflagmMlseMrng.test_priority.default, %resolver_else2 ] ret ptr %common.ret.op resolver_else: ; preds = %resolver_entry - %3 = and i64 %0, 9570149208162304 - %4 = icmp eq i64 %3, 9570149208162304 + %3 = and i64 %0, 1169304924928 + %4 = icmp eq i64 %3, 1169304924928 br i1 %4, label %common.ret, label %resolver_else2 resolver_else2: ; preds = %resolver_else - %5 = and i64 %0, 1099511627776 - %.not = icmp eq i64 %5, 0 - %test_priority._Msve2-sha3.test_priority.default = select i1 %.not, ptr @test_priority.default, ptr @test_priority._Msve2-sha3 + %5 = and i64 %0, 131 + %6 = icmp eq i64 %5, 131 + %test_priority._MflagmMlseMrng.test_priority.default = select i1 %6, ptr @test_priority._MflagmMlseMrng, ptr @test_priority.default br label %common.ret } -define i32 @caller6._MflagmMls64MlseMrngMssbsMsve2-sha3() #13 { -; CHECK-LABEL: define i32 @caller6._MflagmMls64MlseMrngMssbsMsve2-sha3( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR12:[0-9]+]] { -; CHECK: [[CALL:%.*]] = tail call i32 @test_priority._Mls64Mssbs() +define i32 @caller6._McsscMflagmMlseMrngMssbsMsve2-sha3() #13 { +; CHECK-LABEL: define i32 @caller6._McsscMflagmMlseMrngMssbsMsve2-sha3( +; CHECK-SAME: ) #[[ATTR13:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_priority._McsscMssbs() ; entry: %call = tail call i32 @test_priority() ret i32 %call } +define i32 @caller6.default() #0 { +; CHECK-LABEL: define i32 @caller6.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_priority() +; +entry: + %call = tail call i32 @test_priority() + ret i32 %call +} + +define weak_odr ptr @caller6.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller6.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 564119258348419 + %2 = icmp eq i64 %1, 564119258348419 + %caller6._McsscMflagmMlseMrngMssbsMsve2-sha3.caller6.default = select i1 %2, ptr @caller6._McsscMflagmMlseMrngMssbsMsve2-sha3, ptr @caller6.default + ret ptr %caller6._McsscMflagmMlseMrngMssbsMsve2-sha3.caller6.default +} + declare i32 @test_alternative_names._Mdpb2Mfrintts() #14 declare i32 @test_alternative_names._Mflagm2Mfrintts() #15 declare i32 @test_alternative_names._Mrcpc2() #16 @@ -310,7 +407,7 @@ resolver_else2: ; preds = %resolver_else define i32 @caller7._Mdpb2Mfrintts() #14 { ; CHECK-LABEL: define i32 @caller7._Mdpb2Mfrintts( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR13:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR14:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names._Mdpb2Mfrintts() ; entry: @@ -320,7 +417,7 @@ entry: define i32 @caller7._Mfrintts() #17 { ; CHECK-LABEL: define i32 @caller7._Mfrintts( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR16:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR17:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names() ; entry: @@ -330,7 +427,7 @@ entry: define i32 @caller7._Mrcpc2() #16 { ; CHECK-LABEL: define i32 @caller7._Mrcpc2( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR15:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR16:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names._Mrcpc2() ; entry: @@ -340,7 +437,7 @@ entry: define i32 @caller7.default() #0 { ; CHECK-LABEL: define i32 @caller7.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names.default() ; entry: @@ -348,6 +445,239 @@ entry: ret i32 %call } +define weak_odr ptr @caller7.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller7.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 17563904 + %2 = icmp eq i64 %1, 17563904 + br i1 %2, label %common.ret, label %resolver_else + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @caller7._Mdpb2Mfrintts, %resolver_entry ], [ @caller7._Mfrintts, %resolver_else ], [ %caller7._Mrcpc2.caller7.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %3 = and i64 %0, 16777472 + %4 = icmp eq i64 %3, 16777472 + br i1 %4, label %common.ret, label %resolver_else2 + +resolver_else2: ; preds = %resolver_else + %5 = and i64 %0, 12582912 + %6 = icmp eq i64 %5, 12582912 + %caller7._Mrcpc2.caller7.default = select i1 %6, ptr @caller7._Mrcpc2, ptr @caller7.default + br label %common.ret +} + +declare i32 @test_unrelated_callers._Mmops() #3 +declare i32 @test_unrelated_callers._Msve2() #2 +declare i32 @test_unrelated_callers._Msve() #1 +declare i32 @test_unrelated_callers.default() #0 + +define weak_odr ptr @test_unrelated_callers.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @test_unrelated_callers.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + br i1 %.not, label %resolver_else, label %common.ret + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @test_unrelated_callers._Mmops, %resolver_entry ], [ @test_unrelated_callers._Msve2, %resolver_else ], [ %test_unrelated_callers._Msve.test_unrelated_callers.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %2 = and i64 %0, 69793284352 + %3 = icmp eq i64 %2, 69793284352 + br i1 %3, label %common.ret, label %resolver_else2 + +resolver_else2: ; preds = %resolver_else + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %test_unrelated_callers._Msve.test_unrelated_callers.default = select i1 %5, ptr @test_unrelated_callers._Msve, ptr @test_unrelated_callers.default + br label %common.ret +} + +define i32 @caller8._MmopsMsve2() #4 { +; CHECK-LABEL: define i32 @caller8._MmopsMsve2( +; CHECK-SAME: ) #[[ATTR4]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Mmops() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define dso_local i32 @caller8._Msve2() #2 { +; CHECK-LABEL: define dso_local i32 @caller8._Msve2( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Msve2() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define i32 @caller8.default() #0 { +; CHECK-LABEL: define i32 @caller8.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define weak_odr ptr @caller8.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller8.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460822096707840 + %2 = icmp eq i64 %1, 576460822096707840 + %3 = and i64 %0, 69793284352 + %4 = icmp eq i64 %3, 69793284352 + %caller8._Msve2.caller8.default = select i1 %4, ptr @caller8._Msve2, ptr @caller8.default + %common.ret.op = select i1 %2, ptr @caller8._MmopsMsve2, ptr %caller8._Msve2.caller8.default + ret ptr %common.ret.op +} + +define i32 @caller9._Mmops() #3 { +; CHECK-LABEL: define i32 @caller9._Mmops( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Mmops() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define i32 @caller9._Msve() #1 { +; CHECK-LABEL: define i32 @caller9._Msve( +; CHECK-SAME: ) #[[ATTR1]] { +entry: + ret i32 1 +} + +define i32 @caller9.default() #0 { +; CHECK-LABEL: define i32 @caller9.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers.default() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define weak_odr ptr @caller9.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller9.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + %2 = and i64 %0, 1073807616 + %3 = icmp eq i64 %2, 1073807616 + %caller9._Msve.caller9.default = select i1 %3, ptr @caller9._Msve, ptr @caller9.default + %common.ret.op = select i1 %.not, ptr %caller9._Msve.caller9.default, ptr @caller9._Mmops + ret ptr %common.ret.op +} + +define i32 @caller10() #18 { +; CHECK-LABEL: define i32 @caller10( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR18:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Mmops() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +declare i32 @test_clear_known_bits._Mmops() #3 +declare i32 @test_clear_known_bits._Maes() #6 +declare i32 @test_clear_known_bits.default() #0 + +define weak_odr ptr @test_clear_known_bits.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @test_clear_known_bits.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + %2 = and i64 %0, 33536 + %3 = icmp eq i64 %2, 33536 + %test_clear_known_bits._Maes.test_clear_known_bits.default = select i1 %3, ptr @test_clear_known_bits._Maes, ptr @test_clear_known_bits.default + %common.ret.op = select i1 %.not, ptr %test_clear_known_bits._Maes.test_clear_known_bits.default, ptr @test_clear_known_bits._Mmops + ret ptr %common.ret.op +} + +define i32 @caller11._MmopsMsve2() #4 { +; CHECK-LABEL: define i32 @caller11._MmopsMsve2( +; CHECK-SAME: ) #[[ATTR4]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_clear_known_bits._Mmops() +; +entry: + %call = tail call i32 @test_clear_known_bits() + ret i32 %call +} + +define i32 @caller11._Msme() #5 { +; CHECK-LABEL: define i32 @caller11._Msme( +; CHECK-SAME: ) #[[ATTR5:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_clear_known_bits() +; +entry: + %call = tail call i32 @test_clear_known_bits() + ret i32 %call +} + +define noundef i32 @caller11._MaesMsve2() #19 { +; CHECK-LABEL: define noundef i32 @caller11._MaesMsve2( +; CHECK-SAME: ) #[[ATTR19:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_clear_known_bits._Maes() +; +entry: + %call = tail call i32 @test_clear_known_bits() + ret i32 %call +} + +define i32 @caller11.default() #0 { +; CHECK-LABEL: define i32 @caller11.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_clear_known_bits() +; +entry: + %call = tail call i32 @test_clear_known_bits() + ret i32 %call +} + +define weak_odr ptr @caller11.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller11.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460822096707840 + %2 = icmp eq i64 %1, 576460822096707840 + br i1 %2, label %common.ret, label %resolver_else + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @caller11._MmopsMsve2, %resolver_entry ], [ @caller11._Msme, %resolver_else ], [ %caller11._MaesMsve2.caller11.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %3 = and i64 %0, 4398180795136 + %4 = icmp eq i64 %3, 4398180795136 + br i1 %4, label %common.ret, label %resolver_else2 + +resolver_else2: ; preds = %resolver_else + %5 = and i64 %0, 69793317632 + %6 = icmp eq i64 %5, 69793317632 + %caller11._MaesMsve2.caller11.default = select i1 %6, ptr @caller11._MaesMsve2, ptr @caller11.default + br label %common.ret +} + attributes #0 = { "fmv-features" } attributes #1 = { "fmv-features"="sve" } attributes #2 = { "fmv-features"="sve2" } @@ -359,10 +689,12 @@ attributes #7 = { "fmv-features"="sm4" } attributes #8 = { "target-features"="+aes,+fp-armv8,+neon,+outline-atomics,+v8a" } attributes #9 = { "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,+sm4" } attributes #10 = { "fmv-features"="sve2-sha3" } -attributes #11 = { "fmv-features"="ls64,ssbs" } +attributes #11 = { "fmv-features"="cssc,ssbs" } attributes #12 = { "fmv-features"="flagm,lse,rng" } -attributes #13 = { "fmv-features"="flagm,ls64,lse,rng,ssbs,sve2-sha3" } +attributes #13 = { "fmv-features"="cssc,flagm,lse,rng,ssbs,sve2-sha3" } attributes #14 = { "fmv-features"="dpb2,frintts" } attributes #15 = { "fmv-features"="flagm2,frintts" } attributes #16 = { "fmv-features"="rcpc2" } attributes #17 = { "fmv-features"="frintts" } +attributes #18 = { "target-features"="+fp-armv8,+mops,+neon,+outline-atomics,+sve,+v8a" } +attributes #19 = { "fmv-features"="aes,sve2" }