Skip to content

Commit c5e6d92

Browse files
committed
Adds support for static resolution of calls with explicit version priority.
1 parent c5bd398 commit c5e6d92

File tree

8 files changed

+246
-20
lines changed

8 files changed

+246
-20
lines changed
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O3 -fno-inline -emit-llvm -o - %s | FileCheck %s
3+
4+
__attribute__((target_version("priority1+lse"))) int foo(void) { return 1; }
5+
__attribute__((target_version("priority2+sve2"))) int foo(void) { return 2; }
6+
__attribute__((target_version("priority3+sve"))) int foo(void) { return 3; }
7+
__attribute__((target_version( "default"))) int foo(void) { return 0; }
8+
9+
__attribute__((target_clones("priority1+lse+sve2", "priority2+lse", "priority3+sve", "default")))
10+
int fmv_caller(void) { return foo(); }
11+
12+
13+
__attribute__((target_version("aes"))) int bar(void) { return 1; }
14+
__attribute__((target_version("priority1+sm4"))) int bar(void) { return 2; }
15+
__attribute__((target_version("default"))) int bar(void) { return 0; }
16+
17+
__attribute__((target("aes"))) int regular_caller_aes() { return bar(); }
18+
__attribute__((target("sm4"))) int regular_caller_sm4() { return bar(); }
19+
//.
20+
// CHECK: @__aarch64_cpu_features = external dso_local local_unnamed_addr global { i64 }
21+
// CHECK: @foo = weak_odr ifunc i32 (), ptr @foo.resolver
22+
// CHECK: @fmv_caller = weak_odr ifunc i32 (), ptr @fmv_caller.resolver
23+
// CHECK: @bar = weak_odr ifunc i32 (), ptr @bar.resolver
24+
//.
25+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
26+
// CHECK-LABEL: define {{[^@]+}}@foo._Mlse
27+
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
28+
// CHECK-NEXT: entry:
29+
// CHECK-NEXT: ret i32 1
30+
//
31+
//
32+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
33+
// CHECK-LABEL: define {{[^@]+}}@foo._Msve2
34+
// CHECK-SAME: () #[[ATTR1:[0-9]+]] {
35+
// CHECK-NEXT: entry:
36+
// CHECK-NEXT: ret i32 2
37+
//
38+
//
39+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
40+
// CHECK-LABEL: define {{[^@]+}}@foo._Msve
41+
// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
42+
// CHECK-NEXT: entry:
43+
// CHECK-NEXT: ret i32 3
44+
//
45+
//
46+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
47+
// CHECK-LABEL: define {{[^@]+}}@foo.default
48+
// CHECK-SAME: () #[[ATTR3:[0-9]+]] {
49+
// CHECK-NEXT: entry:
50+
// CHECK-NEXT: ret i32 0
51+
//
52+
//
53+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
54+
// CHECK-LABEL: define {{[^@]+}}@fmv_caller._MlseMsve2
55+
// CHECK-SAME: () #[[ATTR4:[0-9]+]] {
56+
// CHECK-NEXT: entry:
57+
// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse()
58+
// CHECK-NEXT: ret i32 [[CALL]]
59+
//
60+
//
61+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
62+
// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Mlse
63+
// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
64+
// CHECK-NEXT: entry:
65+
// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo._Mlse()
66+
// CHECK-NEXT: ret i32 [[CALL]]
67+
//
68+
//
69+
// CHECK: Function Attrs: noinline nounwind vscale_range(1,16)
70+
// CHECK-LABEL: define {{[^@]+}}@fmv_caller._Msve
71+
// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
72+
// CHECK-NEXT: entry:
73+
// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo() #[[ATTR12:[0-9]+]]
74+
// CHECK-NEXT: ret i32 [[CALL]]
75+
//
76+
//
77+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16)
78+
// CHECK-LABEL: define {{[^@]+}}@fmv_caller.default
79+
// CHECK-SAME: () #[[ATTR7:[0-9]+]] {
80+
// CHECK-NEXT: entry:
81+
// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @foo.default()
82+
// CHECK-NEXT: ret i32 [[CALL]]
83+
//
84+
//
85+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
86+
// CHECK-LABEL: define {{[^@]+}}@bar._Maes
87+
// CHECK-SAME: () #[[ATTR8:[0-9]+]] {
88+
// CHECK-NEXT: entry:
89+
// CHECK-NEXT: ret i32 1
90+
//
91+
//
92+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
93+
// CHECK-LABEL: define {{[^@]+}}@bar._Msm4
94+
// CHECK-SAME: () #[[ATTR9:[0-9]+]] {
95+
// CHECK-NEXT: entry:
96+
// CHECK-NEXT: ret i32 2
97+
//
98+
//
99+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
100+
// CHECK-LABEL: define {{[^@]+}}@bar.default
101+
// CHECK-SAME: () #[[ATTR3]] {
102+
// CHECK-NEXT: entry:
103+
// CHECK-NEXT: ret i32 0
104+
//
105+
//
106+
// CHECK: Function Attrs: noinline nounwind
107+
// CHECK-LABEL: define {{[^@]+}}@regular_caller_aes
108+
// CHECK-SAME: () local_unnamed_addr #[[ATTR10:[0-9]+]] {
109+
// CHECK-NEXT: entry:
110+
// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar() #[[ATTR12]]
111+
// CHECK-NEXT: ret i32 [[CALL]]
112+
//
113+
//
114+
// CHECK: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none)
115+
// CHECK-LABEL: define {{[^@]+}}@regular_caller_sm4
116+
// CHECK-SAME: () local_unnamed_addr #[[ATTR11:[0-9]+]] {
117+
// CHECK-NEXT: entry:
118+
// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @bar._Msm4()
119+
// CHECK-NEXT: ret i32 [[CALL]]
120+
//
121+
//
122+
// CHECK-LABEL: define {{[^@]+}}@foo.resolver() comdat {
123+
// CHECK-NEXT: resolver_entry:
124+
// CHECK-NEXT: tail call void @__init_cpu_features_resolver()
125+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
126+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 128
127+
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
128+
// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE:%.*]], label [[COMMON_RET:%.*]]
129+
// CHECK: common.ret:
130+
// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @foo._Mlse, [[RESOLVER_ENTRY:%.*]] ], [ @foo._Msve2, [[RESOLVER_ELSE]] ], [ [[FOO__MSVE_FOO_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ]
131+
// CHECK-NEXT: ret ptr [[COMMON_RET_OP]]
132+
// CHECK: resolver_else:
133+
// CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP0]], 69793284352
134+
// CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 69793284352
135+
// CHECK-NEXT: br i1 [[TMP3]], label [[COMMON_RET]], label [[RESOLVER_ELSE2]]
136+
// CHECK: resolver_else2:
137+
// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616
138+
// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616
139+
// CHECK-NEXT: [[FOO__MSVE_FOO_DEFAULT]] = select i1 [[TMP5]], ptr @foo._Msve, ptr @foo.default
140+
// CHECK-NEXT: br label [[COMMON_RET]]
141+
//
142+
//
143+
// CHECK-LABEL: define {{[^@]+}}@fmv_caller.resolver() comdat {
144+
// CHECK-NEXT: resolver_entry:
145+
// CHECK-NEXT: tail call void @__init_cpu_features_resolver()
146+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
147+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 69793284480
148+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 69793284480
149+
// CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[RESOLVER_ELSE:%.*]]
150+
// CHECK: common.ret:
151+
// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi ptr [ @fmv_caller._MlseMsve2, [[RESOLVER_ENTRY:%.*]] ], [ @fmv_caller._Mlse, [[RESOLVER_ELSE]] ], [ [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT:%.*]], [[RESOLVER_ELSE2:%.*]] ]
152+
// CHECK-NEXT: ret ptr [[COMMON_RET_OP]]
153+
// CHECK: resolver_else:
154+
// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 128
155+
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[TMP3]], 0
156+
// CHECK-NEXT: br i1 [[DOTNOT]], label [[RESOLVER_ELSE2]], label [[COMMON_RET]]
157+
// CHECK: resolver_else2:
158+
// CHECK-NEXT: [[TMP4:%.*]] = and i64 [[TMP0]], 1073807616
159+
// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP4]], 1073807616
160+
// CHECK-NEXT: [[FMV_CALLER__MSVE_FMV_CALLER_DEFAULT]] = select i1 [[TMP5]], ptr @fmv_caller._Msve, ptr @fmv_caller.default
161+
// CHECK-NEXT: br label [[COMMON_RET]]
162+
//
163+
//
164+
// CHECK-LABEL: define {{[^@]+}}@bar.resolver() comdat {
165+
// CHECK-NEXT: resolver_entry:
166+
// CHECK-NEXT: tail call void @__init_cpu_features_resolver()
167+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
168+
// CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 800
169+
// CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 800
170+
// CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP0]], 33536
171+
// CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 33536
172+
// CHECK-NEXT: [[BAR__MAES_BAR_DEFAULT:%.*]] = select i1 [[TMP4]], ptr @bar._Maes, ptr @bar.default
173+
// CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[TMP2]], ptr @bar._Msm4, ptr [[BAR__MAES_BAR_DEFAULT]]
174+
// CHECK-NEXT: ret ptr [[COMMON_RET_OP]]
175+
//
176+
//.
177+
// CHECK: attributes #[[ATTR0]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="lse,priority1" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" }
178+
// CHECK: attributes #[[ATTR1]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="priority2,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve,+sve2" }
179+
// CHECK: attributes #[[ATTR2]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="priority3,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
180+
// CHECK: attributes #[[ATTR3]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
181+
// CHECK: attributes #[[ATTR4]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="lse,priority1,sve2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+lse,+sve,+sve2" }
182+
// CHECK: attributes #[[ATTR5]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features"="lse,priority2" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse" }
183+
// CHECK: attributes #[[ATTR6]] = { noinline nounwind vscale_range(1,16) "fmv-features"="priority3,sve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" }
184+
// CHECK: attributes #[[ATTR7]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) vscale_range(1,16) "fmv-features" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
185+
// CHECK: attributes #[[ATTR8]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="aes" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" }
186+
// CHECK: attributes #[[ATTR9]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "fmv-features"="priority1,sm4" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" }
187+
// CHECK: attributes #[[ATTR10]] = { noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+fp-armv8,+neon" }
188+
// CHECK: attributes #[[ATTR11]] = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4" }
189+
// CHECK: attributes #[[ATTR12]] = { nounwind }
190+
//.
191+
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
192+
// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
193+
//.

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1916,9 +1916,13 @@ class TargetTransformInfo {
19161916
LLVM_ABI bool hasArmWideBranch(bool Thumb) const;
19171917

19181918
/// Returns a bitmask constructed from the target-features or fmv-features
1919-
/// metadata of a function.
1919+
/// metadata of a function corresponding to its Arch Extensions.
19201920
LLVM_ABI uint64_t getFeatureMask(const Function &F) const;
19211921

1922+
/// Returns a bitmask constructed from the target-features or fmv-features
1923+
/// metadata of a function corresponding to its FMV priority.
1924+
LLVM_ABI uint64_t getPriorityMask(const Function &F) const;
1925+
19221926
/// Returns true if this is an instance of a function with multiple versions.
19231927
LLVM_ABI bool isMultiversionedFunction(const Function &F) const;
19241928

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,7 @@ class TargetTransformInfoImplBase {
11261126
virtual bool hasArmWideBranch(bool) const { return false; }
11271127

11281128
virtual uint64_t getFeatureMask(const Function &F) const { return 0; }
1129+
virtual uint64_t getPriorityMask(const Function &F) const { return 0; }
11291130

11301131
virtual bool isMultiversionedFunction(const Function &F) const {
11311132
return false;

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,6 +1426,10 @@ uint64_t TargetTransformInfo::getFeatureMask(const Function &F) const {
14261426
return TTIImpl->getFeatureMask(F);
14271427
}
14281428

1429+
uint64_t TargetTransformInfo::getPriorityMask(const Function &F) const {
1430+
return TTIImpl->getPriorityMask(F);
1431+
}
1432+
14291433
bool TargetTransformInfo::isMultiversionedFunction(const Function &F) const {
14301434
return TTIImpl->isMultiversionedFunction(F);
14311435
}

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,12 +249,23 @@ static bool hasPossibleIncompatibleOps(const Function *F) {
249249
return false;
250250
}
251251

252-
uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const {
252+
static void extractAttrFeatures(const Function &F, const AArch64TTIImpl *TTI,
253+
SmallVectorImpl<StringRef> &Features) {
253254
StringRef AttributeStr =
254-
isMultiversionedFunction(F) ? "fmv-features" : "target-features";
255+
TTI->isMultiversionedFunction(F) ? "fmv-features" : "target-features";
255256
StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
256-
SmallVector<StringRef, 8> Features;
257257
FeatureStr.split(Features, ",");
258+
}
259+
260+
uint64_t AArch64TTIImpl::getFeatureMask(const Function &F) const {
261+
SmallVector<StringRef, 8> Features;
262+
extractAttrFeatures(F, this, Features);
263+
return AArch64::getCpuSupportsMask(Features);
264+
}
265+
266+
uint64_t AArch64TTIImpl::getPriorityMask(const Function &F) const {
267+
SmallVector<StringRef, 8> Features;
268+
extractAttrFeatures(F, this, Features);
258269
return AArch64::getFMVPriority(Features);
259270
}
260271

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
9292
unsigned DefaultCallPenalty) const override;
9393

9494
uint64_t getFeatureMask(const Function &F) const override;
95+
uint64_t getPriorityMask(const Function &F) const override;
9596

9697
bool isMultiversionedFunction(const Function &F) const override;
9798

llvm/lib/TargetParser/AArch64TargetParser.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,17 +55,21 @@ std::optional<AArch64::FMVInfo> lookupFMVByID(AArch64::ArchExtKind ExtID) {
5555
return {};
5656
}
5757

58+
std::optional<AArch64::FMVInfo> getFMVInfoFrom(StringRef Feature) {
59+
std::optional<AArch64::FMVInfo> FMV = AArch64::parseFMVExtension(Feature);
60+
if (!FMV && Feature.starts_with('+'))
61+
if (std::optional<AArch64::ExtensionInfo> Ext =
62+
AArch64::targetFeatureToExtension(Feature))
63+
FMV = lookupFMVByID(Ext->ID);
64+
return FMV;
65+
}
66+
5867
uint64_t AArch64::getFMVPriority(ArrayRef<StringRef> Features) {
5968
// Transitively enable the Arch Extensions which correspond to each feature.
6069
ExtensionSet FeatureBits;
6170
uint64_t PriorityMask = 0;
6271
for (const StringRef Feature : Features) {
63-
std::optional<FMVInfo> FMV = parseFMVExtension(Feature);
64-
if (!FMV && Feature.starts_with('+')) {
65-
if (std::optional<ExtensionInfo> Info = targetFeatureToExtension(Feature))
66-
FMV = lookupFMVByID(Info->ID);
67-
}
68-
if (FMV) {
72+
if (std::optional<FMVInfo> FMV = getFMVInfoFrom(Feature)) {
6973
// FMV feature without a corresponding Arch Extension may affect priority
7074
if (FMV->ID)
7175
FeatureBits.enable(*FMV->ID);
@@ -86,9 +90,9 @@ uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> Features) {
8690
// Transitively enable the Arch Extensions which correspond to each feature.
8791
ExtensionSet FeatureBits;
8892
for (const StringRef Feature : Features)
89-
if (std::optional<FMVInfo> Info = parseFMVExtension(Feature))
90-
if (Info->ID)
91-
FeatureBits.enable(*Info->ID);
93+
if (std::optional<FMVInfo> FMV = getFMVInfoFrom(Feature))
94+
if (FMV->ID)
95+
FeatureBits.enable(*FMV->ID);
9296

9397
// Construct a bitmask for all the transitively enabled Arch Extensions.
9498
uint64_t FeaturesMask = 0;

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2693,8 +2693,10 @@ static bool OptimizeNonTrivialIFuncs(
26932693
Module &M, function_ref<TargetTransformInfo &(Function &)> GetTTI) {
26942694
bool Changed = false;
26952695

2696-
// Cache containing the mask constructed from a function's target features.
2696+
// Cache containing the feature mask constructed from a function's metadata.
26972697
DenseMap<Function *, uint64_t> FeatureMask;
2698+
// Cache containing the priority mask constructed from a function's metadata.
2699+
DenseMap<Function *, uint64_t> PriorityMask;
26982700

26992701
for (GlobalIFunc &IF : M.ifuncs()) {
27002702
if (IF.isInterposable())
@@ -2724,16 +2726,19 @@ static bool OptimizeNonTrivialIFuncs(
27242726
LLVM_DEBUG(dbgs() << "Statically resolving calls to function "
27252727
<< Resolver->getName() << "\n");
27262728

2727-
// Cache the feature mask for each callee.
2729+
// Cache the masks for each callee.
27282730
for (Function *Callee : Callees) {
2729-
auto [It, Inserted] = FeatureMask.try_emplace(Callee);
2730-
if (Inserted)
2731-
It->second = TTI.getFeatureMask(*Callee);
2731+
auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Callee);
2732+
if (FeatInserted)
2733+
FeatIt->second = TTI.getFeatureMask(*Callee);
2734+
auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Callee);
2735+
if (PriorInserted)
2736+
PriorIt->second = TTI.getPriorityMask(*Callee);
27322737
}
27332738

27342739
// Sort the callee versions in decreasing priority order.
27352740
sort(Callees, [&](auto *LHS, auto *RHS) {
2736-
return FeatureMask[LHS] > FeatureMask[RHS];
2741+
return PriorityMask[LHS] > PriorityMask[RHS];
27372742
});
27382743

27392744
// Find the callsites and cache the feature mask for each caller.
@@ -2746,6 +2751,9 @@ static bool OptimizeNonTrivialIFuncs(
27462751
auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller);
27472752
if (FeatInserted)
27482753
FeatIt->second = TTI.getFeatureMask(*Caller);
2754+
auto [PriorIt, PriorInserted] = PriorityMask.try_emplace(Caller);
2755+
if (PriorInserted)
2756+
PriorIt->second = TTI.getPriorityMask(*Caller);
27492757
auto [CallIt, CallInserted] = CallSites.try_emplace(Caller);
27502758
if (CallInserted)
27512759
Callers.push_back(Caller);
@@ -2756,7 +2764,7 @@ static bool OptimizeNonTrivialIFuncs(
27562764

27572765
// Sort the caller versions in decreasing priority order.
27582766
sort(Callers, [&](auto *LHS, auto *RHS) {
2759-
return FeatureMask[LHS] > FeatureMask[RHS];
2767+
return PriorityMask[LHS] > PriorityMask[RHS];
27602768
});
27612769

27622770
auto implies = [](uint64_t A, uint64_t B) { return (A & B) == B; };

0 commit comments

Comments
 (0)