Skip to content

Commit b2338d4

Browse files
committed
Merging r344824:
------------------------------------------------------------------------ r344824 | ctopper | 2018-10-19 18:30:00 -0700 (Fri, 19 Oct 2018) | 14 lines [X86] When checking the bits in cpu_features for function multiversioning dispatcher in the resolver, make sure all the required bits are set. Not just one of them Summary: The multiversioning code repurposed the code from __builtin_cpu_supports for checking if a single feature is enabled. That code essentially performed (_cpu_features & (1 << C)) != 0. But with the multiversioning path, the mask is no longer guaranteed to be a power of 2. So we return true anytime any one of the bits in the mask is set not just all of the bits. The correct check is (_cpu_features & mask) == mask Reviewers: erichkeane, echristo Reviewed By: echristo Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D53460 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_70@344923 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0513b40 commit b2338d4

File tree

3 files changed

+20
-5
lines changed

3 files changed

+20
-5
lines changed

lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8952,9 +8952,9 @@ llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint32_t FeaturesMask) {
89528952
Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
89538953

89548954
// Check the value of the bit corresponding to the feature requested.
8955-
Value *Bitset = Builder.CreateAnd(
8956-
Features, llvm::ConstantInt::get(Int32Ty, FeaturesMask));
8957-
return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
8955+
Value *Mask = Builder.getInt32(FeaturesMask);
8956+
Value *Bitset = Builder.CreateAnd(Features, Mask);
8957+
return Builder.CreateICmpEQ(Bitset, Mask);
89588958
}
89598959

89608960
Value *CodeGenFunction::EmitX86CpuInit() {

test/CodeGen/attr-target-mv.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ void bar4() {
7070
// CHECK: ret void ()* @foo_decls.sse4.2
7171
// CHECK: ret void ()* @foo_decls
7272

73+
// CHECK: define void @bar4()
74+
// CHECK: call void @foo_multi.ifunc()
75+
76+
// CHECK: define void ()* @foo_multi.resolver() comdat
77+
// CHECK: and i32 %{{.*}}, 4352
78+
// CHECK: icmp eq i32 %{{.*}}, 4352
79+
// CHECK: ret void ()* @foo_multi.fma4_sse4.2
80+
// CHECK: icmp eq i32 %{{.*}}, 12
81+
// CHECK: and i32 %{{.*}}, 4352
82+
// CHECK: icmp eq i32 %{{.*}}, 4352
83+
// CHECK: ret void ()* @foo_multi.arch_ivybridge_fma4_sse4.2
84+
// CHECK: and i32 %{{.*}}, 768
85+
// CHECK: icmp eq i32 %{{.*}}, 768
86+
// CHECK: ret void ()* @foo_multi.avx_sse4.2
87+
// CHECK: ret void ()* @foo_multi
88+
7389
// CHECK: declare i32 @foo.arch_sandybridge()
7490

7591
// CHECK: define available_externally i32 @foo_inline.sse4.2()
@@ -88,4 +104,3 @@ void bar4() {
88104
// CHECK: define available_externally void @foo_multi.avx_sse4.2()
89105
// CHECK: define available_externally void @foo_multi.fma4_sse4.2()
90106
// CHECK: define available_externally void @foo_multi.arch_ivybridge_fma4_sse4.2()
91-

test/CodeGen/builtin-cpu-supports.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ int main() {
1414

1515
// CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 3, i32 0)
1616
// CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256
17-
// CHECK: = icmp ne i32 [[AND]], 0
17+
// CHECK: = icmp eq i32 [[AND]], 256
1818

1919
return 0;
2020
}

0 commit comments

Comments
 (0)