Skip to content

Commit a848c1b

Browse files
authored
[sroa][profcheck] Vector selects have "unknown" branch weights (#163319)
1 parent 1127dd7 commit a848c1b

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2667,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
26672667
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
26682668
Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
26692669

2670-
V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
2670+
// No profiling support for vector selects.
2671+
V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
2672+
DEBUG_TYPE, Name + "blend");
26712673

26722674
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
26732675
return V;

llvm/test/Transforms/SROA/slice-width.ll

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
22
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
33
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
44
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
88
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
99

1010
; This tests that allocas are not split into slices that are not byte width multiple
11+
;.
12+
; CHECK: @foo_copy_source = external constant %union.Foo
13+
; CHECK: @i64_sink = global i64 0
14+
;.
1115
define void @no_split_on_non_byte_width(i32) {
1216
; CHECK-LABEL: @no_split_on_non_byte_width(
1317
; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8
@@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr)
9296

9397
; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
9498
; vector store, hence accidentally putting gibberish onto the stack.
95-
define i32 @memcpy_vec3float_widening(ptr %x) {
99+
define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 {
96100
; CHECK-LABEL: @memcpy_vec3float_widening(
97101
; CHECK-NEXT: entry:
98102
; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4
99103
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
100-
; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
104+
; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]]
101105
; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
102106
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
103107
; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4
@@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() {
158162
%L2 = load i1, ptr %A
159163
ret i1 %L2
160164
}
165+
!0 = !{!"function_entry_count", i32 10}
166+
167+
;.
168+
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
169+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
170+
;.
171+
; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
172+
; CHECK: [[PROF1]] = !{!"unknown", !"sroa"}
173+
;.
161174
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
162175
; CHECK-MODIFY-CFG: {{.*}}
163176
; CHECK-PRESERVE-CFG: {{.*}}

llvm/utils/profcheck-xfail.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,14 +1310,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
13101310
Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
13111311
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
13121312
Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
1313-
Transforms/SROA/phi-gep.ll
1314-
Transforms/SROA/scalable-vectors-with-known-vscale.ll
1315-
Transforms/SROA/select-gep.ll
1316-
Transforms/SROA/select-load.ll
1317-
Transforms/SROA/slice-width.ll
1318-
Transforms/SROA/vector-conversion.ll
1319-
Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
1320-
Transforms/SROA/vector-promotion.ll
13211313
Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
13221314
Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
13231315
Transforms/StructurizeCFG/hoist-zerocost.ll

0 commit comments

Comments
 (0)