-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[DirectX] Reland #142853 with Circular GEP fixes #143747
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
partially fixes llvm#142836 - Update DXILFlattenArrays.cpp GEPs to use two indicies since they are array GEPs - Update flatten test cases - This change reduces dxv bitcast validation errors by 364 (Total now is 1070x) - This change reduces dxv out of bounds validation errors by 124 (Total is now 24) - We are also able to successfully compile 4 more shaders
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) ChangesThis change relands #142853 Patch is 20.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143747.diff 5 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
index a3163a8969642..10883527ffafd 100644
--- a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
+++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
@@ -42,7 +42,7 @@ class DXILFlattenArraysLegacy : public ModulePass {
struct GEPData {
ArrayType *ParentArrayType;
- Value *ParendOperand;
+ Value *ParentOperand;
SmallVector<Value *> Indices;
SmallVector<uint64_t> Dims;
bool AllIndicesAreConstInt;
@@ -211,7 +211,7 @@ bool DXILFlattenArraysVisitor::visitAllocaInst(AllocaInst &AI) {
ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements);
AllocaInst *FlatAlloca =
- Builder.CreateAlloca(FattenedArrayType, nullptr, AI.getName() + ".flat");
+ Builder.CreateAlloca(FattenedArrayType, nullptr, AI.getName() + ".1dim");
FlatAlloca->setAlignment(AI.getAlign());
AI.replaceAllUsesWith(FlatAlloca);
AI.eraseFromParent();
@@ -222,6 +222,10 @@ void DXILFlattenArraysVisitor::recursivelyCollectGEPs(
GetElementPtrInst &CurrGEP, ArrayType *FlattenedArrayType,
Value *PtrOperand, unsigned &GEPChainUseCount, SmallVector<Value *> Indices,
SmallVector<uint64_t> Dims, bool AllIndicesAreConstInt) {
+ // Check if this GEP is already in the map to avoid circular references
+ if (GEPChainMap.count(&CurrGEP) > 0)
+ return;
+
Value *LastIndex = CurrGEP.getOperand(CurrGEP.getNumOperands() - 1);
AllIndicesAreConstInt &= isa<ConstantInt>(LastIndex);
Indices.push_back(LastIndex);
@@ -271,9 +275,18 @@ bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChainBase(
genInstructionFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder);
ArrayType *FlattenedArrayType = GEPInfo.ParentArrayType;
+
+ // Don't append '.flat' to an empty string. If the SSA name isn't available
+ // it could conflict with the ParentOperand's name.
+ std::string FlatName = GEP.hasName() ? GEP.getName().str() + ".flat" : "";
+
Value *FlatGEP =
- Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParendOperand, FlatIndex,
- GEP.getName() + ".flat", GEP.isInBounds());
+ Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParentOperand,
+ {Builder.getInt32(0), FlatIndex},
+ FlatName, GEP.getNoWrapFlags());
+
+ // Store the new GEP in the map before replacing uses to avoid circular references
+ GEPChainMap.erase(&GEP);
GEP.replaceAllUsesWith(FlatGEP);
GEP.eraseFromParent();
diff --git a/llvm/test/CodeGen/DirectX/flatten-array.ll b/llvm/test/CodeGen/DirectX/flatten-array.ll
index 754d5a25ca905..5c761014d471f 100644
--- a/llvm/test/CodeGen/DirectX/flatten-array.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-array.ll
@@ -31,7 +31,7 @@ define void @alloca_4d_test () {
; CHECK-LABEL: gep_2d_test
define void @gep_2d_test () {
; CHECK: [[a:%.*]] = alloca [9 x i32], align 4
- ; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 {{[0-8]}}
+ ; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 0, i32 {{[0-8]}}
; CHECK-NEXT: ret void
%1 = alloca [3 x [3 x i32]], align 4
%g2d0 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 0
@@ -53,7 +53,7 @@ define void @gep_2d_test () {
; CHECK-LABEL: gep_3d_test
define void @gep_3d_test () {
; CHECK: [[a:%.*]] = alloca [8 x i32], align 4
- ; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 {{[0-7]}}
+ ; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 0, i32 {{[0-7]}}
; CHECK-NEXT: ret void
%1 = alloca [2 x[2 x [2 x i32]]], align 4
%g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 0
@@ -76,7 +76,7 @@ define void @gep_3d_test () {
; CHECK-LABEL: gep_4d_test
define void @gep_4d_test () {
; CHECK: [[a:%.*]] = alloca [16 x i32], align 4
- ; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 {{[0-9]|1[0-5]}}
+ ; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 0, i32 {{[0-9]|1[0-5]}}
; CHECK-NEXT: ret void
%1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4
%g4d0 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 0
@@ -123,8 +123,8 @@ define void @gep_4d_test () {
@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
define void @global_gep_load() {
- ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 6
- ; CHECK: load i32, ptr [[GEP_PTR]], align 4
+ ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 6
+ ; CHECK-NEXT: load i32, ptr [[GEP_PTR]], align 4
; CHECK-NEXT: ret void
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 0
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
@@ -142,7 +142,7 @@ define void @global_gep_load_index(i32 %row, i32 %col, i32 %timeIndex) {
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[ROW]], 12
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP6]]
+; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 [[TMP6]]
; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
@@ -163,7 +163,7 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[ROW]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP4]]
+; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 0, i32 [[TMP4]]
; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
@@ -177,8 +177,8 @@ define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
}
define void @global_gep_store() {
- ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 13
- ; CHECK: store i32 1, ptr [[GEP_PTR]], align 4
+ ; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 0, i32 13
+ ; CHECK-NEXT: store i32 1, ptr [[GEP_PTR]], align 4
; CHECK-NEXT: ret void
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @b, i32 0, i32 1
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 0
diff --git a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
index 3ae5832ce8322..c73e5017348d1 100644
--- a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
+++ b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
@@ -8,9 +8,9 @@
define internal void @main() {
; CHECK-LABEL: define internal void @main() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 0, i32 1
; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 0, i32 2
; CHECK-NEXT: [[DOTI03:%.*]] = load float, ptr [[TMP1]], align 16
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 7e5a92e1311f8..c960aad3d2627 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -32,23 +32,23 @@ define <4 x i32> @load_array_vec_test() #0 {
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(3) [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), i32 1) to ptr addrspace(3)
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
-; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
-; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
-; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
-; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
-; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
-; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
-; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
-; CHECK-NEXT: ret <4 x i32> [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), i32 2) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) [[TMP13]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), i32 3) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) [[TMP15]], align 4
+; CHECK-NEXT: [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
+; CHECK-NEXT: [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
+; CHECK-NEXT: [[DOTI38:%.*]] = add i32 [[TMP8]], [[TMP16]]
+; CHECK-NEXT: [[DOTUPTO01215:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI05]], i32 0
+; CHECK-NEXT: [[DOTUPTO11316:%.*]] = insertelement <4 x i32> [[DOTUPTO01215]], i32 [[DOTI16]], i32 1
+; CHECK-NEXT: [[DOTUPTO21417:%.*]] = insertelement <4 x i32> [[DOTUPTO11316]], i32 [[DOTI27]], i32 2
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[DOTUPTO21417]], i32 [[DOTI38]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP17]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
@@ -81,23 +81,19 @@ define <4 x i32> @load_vec_test() #0 {
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[TMP3]], i32 1
+; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 0, i32 [[INDEX]]
+; CHECK-NEXT: [[DOTI0:%.*]] = load i32, ptr [[DOTFLAT]], align 4
+; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 1
; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[TMP4]], i32 2
+; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 2
; CHECK-NEXT: [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
-; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[TMP5]], i32 3
+; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[DOTFLAT]], i32 3
; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
-; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i32 1
-; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i32 2
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i32 3
-; CHECK-NEXT: ret <4 x i32> [[TMP6]]
+; CHECK-NEXT: [[DOTUPTO01:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI0]], i32 0
+; CHECK-NEXT: [[DOTUPTO12:%.*]] = insertelement <4 x i32> [[DOTUPTO01]], i32 [[DOTI1]], i32 1
+; CHECK-NEXT: [[DOTUPTO23:%.*]] = insertelement <4 x i32> [[DOTUPTO12]], i32 [[DOTI2]], i32 2
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[DOTUPTO23]], i32 [[DOTI3]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
%4 = load <4 x i32>, <4 x i32>* %3, align 4
@@ -115,23 +111,23 @@ define <4 x i32> @multid_load_test() #0 {
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(3) [[TMP9]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 1) to ptr addrspace(3)
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
-; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
-; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
-; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
-; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
-; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i32 0
-; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i32 1
-; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i32 2
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i32 3
-; CHECK-NEXT: ret <4 x i32> [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 2) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(3) [[TMP13]], align 4
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), i32 3) to ptr addrspace(3)
+; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(3) [[TMP15]], align 4
+; CHECK-NEXT: [[DOTI05:%.*]] = add i32 [[TMP2]], [[TMP10]]
+; CHECK-NEXT: [[DOTI16:%.*]] = add i32 [[TMP4]], [[TMP12]]
+; CHECK-NEXT: [[DOTI27:%.*]] = add i32 [[TMP6]], [[TMP14]]
+; CHECK-NEXT: [[DOTI38:%.*]] = add i32 [[TMP8]], [[TMP16]]
+; CHECK-NEXT: [[DOTUPTO01215:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI05]], i32 0
+; CHECK-NEXT: [[DOTUPTO11316:%.*]] = insertelement <4 x i32> [[DOTUPTO01215]], i32 [[DOTI16]], i32 1
+; CHECK-NEXT: [[DOTUPTO21417:%.*]] = insertelement <4 x i32> [[DOTUPTO11316]], i32 [[DOTI27]], i32 2
+; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[DOTUPTO21417]], i32 [[DOTI38]], i32 3
+; CHECK-NEXT: ret <4 x i32> [[TMP17]]
;
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
diff --git a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
index 2676abec1d8ae..a07ce2c24f7ac 100644
--- a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
@@ -8,13 +8,13 @@
define internal void @main() #1 {
; CHECK-LABEL: define internal void @main() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [24 x float], ptr @StaticArr.scalarized.1dim, i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [24 x float], ptr @StaticArr.scalarized.1dim, i32 0, i32 1
; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16
; CHECK-NEXT: [[DOTI1:%.*]] = getelementptr float, ptr [[TMP0]], i32 1
; CHECK-NEXT: [[DOTI11:%.*]] = load float, ptr [[DOTI1]], align 4
; CHECK-NEXT: [[DOTI2:%.*]] = getelementptr float, ptr [[TMP0]], i32 2
; CHECK-NEXT: [[DOTI22:%.*]] = load float, ptr [[DOTI2]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = geteleme...
[truncated]
|
|
||
// Don't append '.flat' to an empty string. If the SSA name isn't available | ||
// it could conflict with the ParentOperand's name. | ||
std::string FlatName = GEP.hasName() ? GEP.getName().str() + ".flat" : ""; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
did .str()
in this case because clangd warned that getName()
alone could cause a potential use after free.
✅ With the latest revision this PR passed the C/C++ code formatter. |
81295ee
to
3ebf17e
Compare
|
||
// Store the new GEP in the map before replacing uses to avoid circular | ||
// references | ||
GEPChainMap.erase(&GEP); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think I understand the comment here with the code correctly.
Is this intended to also insert FlatGEP
to GEPChainMap
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch. Sorry vestigial comment. Should now say something like erase the old Gep before replacing uses.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM once comment is cleaned up.
This change relands llvm#142853 It fixes the circular reference issue we were seeing in GEPs ex `%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32 15`
This change relands llvm#142853 It fixes the circular reference issue we were seeing in GEPs ex `%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32 15`
This change relands #142853
It fixes the circular reference issue we were seeing in GEPs
ex
%.flat = getelementptr inbounds [16 x i32], ptr %.flat, i32 0, i32 15