-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VectorCombine] Preserve scoped alias metadata #153714
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
25cbc66
3c3d2b8
adce747
2148978
6574cf4
9f4a0a0
8dd4467
91f0c8d
c2a88cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -56,9 +56,9 @@ STATISTIC(NumScalarOps, "Number of scalar unary + binary ops formed"); | |||||||
| STATISTIC(NumScalarCmp, "Number of scalar compares formed"); | ||||||||
| STATISTIC(NumScalarIntrinsic, "Number of scalar intrinsic calls formed"); | ||||||||
|
|
||||||||
| static cl::opt<bool> DisableVectorCombine( | ||||||||
| "disable-vector-combine", cl::init(false), cl::Hidden, | ||||||||
| cl::desc("Disable all vector combine transforms")); | ||||||||
| static cl::opt<bool> | ||||||||
| DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, | ||||||||
| cl::desc("Disable all vector combine transforms")); | ||||||||
|
|
||||||||
| static cl::opt<bool> DisableBinopExtractShuffle( | ||||||||
| "disable-binop-extract-shuffle", cl::init(false), cl::Hidden, | ||||||||
|
|
@@ -1074,8 +1074,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { | |||||||
| InstructionCost OldCost = 2 * SplatCost + VectorOpCost; | ||||||||
|
|
||||||||
| // Determine scalar opcode | ||||||||
| std::optional<unsigned> FunctionalOpcode = | ||||||||
| VPI.getFunctionalOpcode(); | ||||||||
| std::optional<unsigned> FunctionalOpcode = VPI.getFunctionalOpcode(); | ||||||||
| std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt; | ||||||||
| if (!FunctionalOpcode) { | ||||||||
| ScalarIntrID = VPI.getFunctionalIntrinsicID(); | ||||||||
|
|
@@ -1098,8 +1097,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { | |||||||
| (SplatCost * !Op0->hasOneUse()) + (SplatCost * !Op1->hasOneUse()); | ||||||||
| InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats; | ||||||||
|
|
||||||||
| LLVM_DEBUG(dbgs() << "Found a VP Intrinsic to scalarize: " << VPI | ||||||||
| << "\n"); | ||||||||
| LLVM_DEBUG(dbgs() << "Found a VP Intrinsic to scalarize: " << VPI << "\n"); | ||||||||
| LLVM_DEBUG(dbgs() << "Cost of Intrinsic: " << OldCost | ||||||||
| << ", Cost of scalarizing:" << NewCost << "\n"); | ||||||||
|
|
||||||||
|
|
@@ -1805,6 +1803,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { | |||||||
| // erased in the correct order. | ||||||||
| Worklist.push(LI); | ||||||||
|
|
||||||||
| Type *ElemType = VecTy->getElementType(); | ||||||||
|
|
||||||||
| // Replace extracts with narrow scalar loads. | ||||||||
| for (User *U : LI->users()) { | ||||||||
| auto *EI = cast<ExtractElementInst>(U); | ||||||||
|
|
@@ -1818,13 +1818,19 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { | |||||||
| Builder.SetInsertPoint(EI); | ||||||||
| Value *GEP = | ||||||||
| Builder.CreateInBoundsGEP(VecTy, Ptr, {Builder.getInt32(0), Idx}); | ||||||||
| auto *NewLoad = cast<LoadInst>(Builder.CreateLoad( | ||||||||
| VecTy->getElementType(), GEP, EI->getName() + ".scalar")); | ||||||||
| auto *NewLoad = cast<LoadInst>( | ||||||||
| Builder.CreateLoad(ElemType, GEP, EI->getName() + ".scalar")); | ||||||||
|
|
||||||||
| Align ScalarOpAlignment = computeAlignmentAfterScalarization( | ||||||||
| LI->getAlign(), VecTy->getElementType(), Idx, *DL); | ||||||||
| Align ScalarOpAlignment = | ||||||||
| computeAlignmentAfterScalarization(LI->getAlign(), ElemType, Idx, *DL); | ||||||||
| NewLoad->setAlignment(ScalarOpAlignment); | ||||||||
|
|
||||||||
| if (auto *ConstIdx = dyn_cast<ConstantInt>(Idx)) { | ||||||||
| size_t Offset = ConstIdx->getZExtValue() * DL->getTypeStoreSize(ElemType); | ||||||||
| AAMDNodes OldAAMD = LI->getAAMetadata(); | ||||||||
| NewLoad->setAAMetadata(OldAAMD.adjustForAccess(Offset, ElemType, *DL)); | ||||||||
| } | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If possible, it would be better to use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @nikic, thank you for the advice. I know it's a more thoughtful solution. But I'm not familiar with AA in LLVM. Can you guide me on how to use
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The type is llvm-project/llvm/lib/Transforms/Vectorize/VectorCombine.cpp Lines 1652 to 1654 in adce747
The problem is if Idx is a variable. In that case the offset is unknown. So I would make the whole logic conditional on the |
||||||||
|
|
||||||||
| replaceValue(*EI, *NewLoad); | ||||||||
| } | ||||||||
|
|
||||||||
|
|
@@ -2070,10 +2076,12 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) { | |||||||
| } | ||||||||
|
|
||||||||
| unsigned NumOpElts = Op0Ty->getNumElements(); | ||||||||
| bool IsIdentity0 = ShuffleDstTy == Op0Ty && | ||||||||
| bool IsIdentity0 = | ||||||||
| ShuffleDstTy == Op0Ty && | ||||||||
| all_of(NewMask0, [NumOpElts](int M) { return M < (int)NumOpElts; }) && | ||||||||
| ShuffleVectorInst::isIdentityMask(NewMask0, NumOpElts); | ||||||||
| bool IsIdentity1 = ShuffleDstTy == Op1Ty && | ||||||||
| bool IsIdentity1 = | ||||||||
| ShuffleDstTy == Op1Ty && | ||||||||
| all_of(NewMask1, [NumOpElts](int M) { return M < (int)NumOpElts; }) && | ||||||||
| ShuffleVectorInst::isIdentityMask(NewMask1, NumOpElts); | ||||||||
|
|
||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt < %s -passes=vector-combine -S | FileCheck %s --check-prefixes=CHECK | ||
|
|
||
| define <4 x i32> @quux(ptr addrspace(3) %arg) { | ||
| ; CHECK-LABEL: define <4 x i32> @quux( | ||
| ; CHECK-SAME: ptr addrspace(3) [[ARG:%.*]]) { | ||
| ; CHECK-NEXT: [[BB:.*:]] | ||
| ; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = load i8, ptr addrspace(3) [[ARG]], align 4, !tbaa [[TBAA0:![0-9]+]], !alias.scope [[META0:![0-9]+]], !noalias [[META0]] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(3) [[ARG]], i32 0, i64 1 | ||
| ; CHECK-NEXT: [[EXTRACTELEMENT1:%.*]] = load i8, ptr addrspace(3) [[TMP0]], align 1, !tbaa [[TBAA0]], !alias.scope [[META0]], !noalias [[META0]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(3) [[ARG]], i32 0, i64 2 | ||
| ; CHECK-NEXT: [[EXTRACTELEMENT2:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 2, !tbaa [[TBAA0]], !alias.scope [[META0]], !noalias [[META0]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x i8>, ptr addrspace(3) [[ARG]], i32 0, i64 3 | ||
| ; CHECK-NEXT: [[EXTRACTELEMENT3:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1, !tbaa [[TBAA0]], !alias.scope [[META0]], !noalias [[META0]] | ||
| ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[EXTRACTELEMENT]] to i32 | ||
| ; CHECK-NEXT: [[ZEXT4:%.*]] = zext i8 [[EXTRACTELEMENT1]] to i32 | ||
| ; CHECK-NEXT: [[ZEXT5:%.*]] = zext i8 [[EXTRACTELEMENT2]] to i32 | ||
| ; CHECK-NEXT: [[ZEXT6:%.*]] = zext i8 [[EXTRACTELEMENT3]] to i32 | ||
| ; CHECK-NEXT: [[INSERTELEMENT:%.*]] = insertelement <4 x i32> poison, i32 [[ZEXT]], i64 0 | ||
| ; CHECK-NEXT: [[INSERTELEMENT7:%.*]] = insertelement <4 x i32> [[INSERTELEMENT]], i32 [[ZEXT4]], i64 1 | ||
| ; CHECK-NEXT: [[INSERTELEMENT8:%.*]] = insertelement <4 x i32> [[INSERTELEMENT7]], i32 [[ZEXT5]], i64 2 | ||
| ; CHECK-NEXT: [[INSERTELEMENT9:%.*]] = insertelement <4 x i32> [[INSERTELEMENT8]], i32 [[ZEXT6]], i64 3 | ||
| ; CHECK-NEXT: ret <4 x i32> [[INSERTELEMENT9]] | ||
| ; | ||
| bb: | ||
| %load = load <4 x i8>, ptr addrspace(3) %arg, align 4, !alias.scope !0, !noalias !0, !tbaa !5 | ||
| %extractelement = extractelement <4 x i8> %load, i64 0 | ||
| %extractelement1 = extractelement <4 x i8> %load, i64 1 | ||
| %extractelement2 = extractelement <4 x i8> %load, i64 2 | ||
| %extractelement3 = extractelement <4 x i8> %load, i64 3 | ||
| %zext = zext i8 %extractelement to i32 | ||
| %zext4 = zext i8 %extractelement1 to i32 | ||
| %zext5 = zext i8 %extractelement2 to i32 | ||
| %zext6 = zext i8 %extractelement3 to i32 | ||
| %insertelement = insertelement <4 x i32> poison, i32 %zext, i64 0 | ||
| %insertelement7 = insertelement <4 x i32> %insertelement, i32 %zext4, i64 1 | ||
| %insertelement8 = insertelement <4 x i32> %insertelement7, i32 %zext5, i64 2 | ||
| %insertelement9 = insertelement <4 x i32> %insertelement8, i32 %zext6, i64 3 | ||
| ret <4 x i32> %insertelement9 | ||
| } | ||
|
|
||
| !0 = !{!1} | ||
| !1 = distinct !{!1, !2} | ||
| !2 = distinct !{!2} | ||
| !3 = !{!"Simple C/C++ TBAA"} | ||
| !4 = !{!"omnipotent char", !3, i64 0} | ||
| !5 = !{!"i8", !4, i64 0} | ||
| ;. | ||
| ; CHECK: [[META0]] = !{[[META1:![0-9]+]]} | ||
| ; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} | ||
| ; CHECK: [[META2]] = distinct !{[[META2]]} | ||
| ;. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please do not reformat unrelated code (you can use git-clang-format to only format changed lines).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah sorry. Fixed