-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[sroa][profcheck] Vector selects have "unknown" branch weights #163319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[sroa][profcheck] Vector selects have "unknown" branch weights #163319
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Scalar/SROA.cpp llvm/test/Transforms/SROA/slice-width.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/163319.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index d2f09e9f3c639..578fec772603f 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2667,7 +2667,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
- V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");
+ // No profiling support for vector selects.
+ V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,
+ DEBUG_TYPE, Name + "blend");
LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
return V;
diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll
index eabb6978c9125..3b77e49e78358 100644
--- a/llvm/test/Transforms/SROA/slice-width.ll
+++ b/llvm/test/Transforms/SROA/slice-width.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
; This tests that allocas are not split into slices that are not byte width multiple
+;.
+; CHECK: @foo_copy_source = external constant %union.Foo
+; CHECK: @i64_sink = global i64 0
+;.
define void @no_split_on_non_byte_width(i32) {
; CHECK-LABEL: @no_split_on_non_byte_width(
; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8
@@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr)
; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
; vector store, hence accidentally putting gibberish onto the stack.
-define i32 @memcpy_vec3float_widening(ptr %x) {
+define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 {
; CHECK-LABEL: @memcpy_vec3float_widening(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef
+; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4
; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4
@@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() {
%L2 = load i1, ptr %A
ret i1 %L2
}
+!0 = !{!"function_entry_count", i32 10}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+;.
+; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"unknown", !"sroa"}
+;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-MODIFY-CFG: {{.*}}
; CHECK-PRESERVE-CFG: {{.*}}
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index 3c8eb0b163ffe..d30673cbb4220 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -1312,14 +1312,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
-Transforms/SROA/phi-gep.ll
-Transforms/SROA/scalable-vectors-with-known-vscale.ll
-Transforms/SROA/select-gep.ll
-Transforms/SROA/select-load.ll
-Transforms/SROA/slice-width.ll
-Transforms/SROA/vector-conversion.ll
-Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll
-Transforms/SROA/vector-promotion.ll
Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
Transforms/StructurizeCFG/hoist-zerocost.ll
|
7727884 to
7860f9b
Compare
79d83ea to
35e3c09
Compare
|
@nikic what's the guidance here - the |
boomanaiden154
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's the guidance here - the undef is pre-existing. (Happy to patch, too, in a separate PR, but looking for guidance first)
Nuno added that check a while ago and it's not super high fidelity. Might be good to file an issue about the check...
It should be fine here because it's undef produced by an optimization specifically because the test is loading from uninitialized memory rather than someone using undef within a test as a dummy value. Not sure whether or not it's better to patch to remove the uninitialized memory load.
35e3c09 to
6838129
Compare
6838129 to
bd4bc1e
Compare
|
(For tracking) The issue @boomanaiden154 is talking about is Issue #163478 |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/24008 Here is the relevant piece of the build log for the reference |

Issue #147390