diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index d9d9b36d0b739..7c8a41b05f805 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -305,6 +305,33 @@ bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { Type *OrigGEPType = GEPI.getSourceElementType(); Type *NewGEPType = OrigGEPType; bool NeedsTransform = false; + // Check if the pointer operand is a ConstantExpr GEP + if (auto *PtrOpGEPCE = dyn_cast(PtrOperand); + PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) { + if (GlobalVariable *NewGlobal = + lookupReplacementGlobal(PtrOpGEPCE->getOperand(0))) { + GetElementPtrInst *NestedGEP = + cast(PtrOpGEPCE->getAsInstruction()); + NestedGEP->insertBefore(GEPI.getIterator()); + + // Create a new GEP with the replaced global directly + IRBuilder<> Builder(&GEPI); + Type *NewNestedGEPType = NewGlobal->getValueType(); + + // Extract indices from the ConstantExpr GEP + SmallVector NestedIndices(NestedGEP->indices()); + Value *NewNestedGEP = + Builder.CreateGEP(NewNestedGEPType, NewGlobal, NestedIndices, + NestedGEP->getName(), NestedGEP->getNoWrapFlags()); + + // Update the outer GEP to use the new nested GEP + GEPI.setOperand(GEPI.getPointerOperandIndex(), NewNestedGEP); + NestedGEP->replaceAllUsesWith(NewNestedGEP); + NestedGEP->eraseFromParent(); + // Return true to indicate that we've modified the instruction + return true; + } + } if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) { NewGEPType = NewGlobal->getValueType(); diff --git a/llvm/test/CodeGen/DirectX/bugfix_139023_data_scalarize_const_gep.ll b/llvm/test/CodeGen/DirectX/bugfix_139023_data_scalarize_const_gep.ll new file mode 100644 index 0000000000000..2decb3d6e19ee --- /dev/null +++ b/llvm/test/CodeGen/DirectX/bugfix_139023_data_scalarize_const_gep.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK +; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.4-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK + +@aTile = hidden addrspace(3) global [10 x [10 x <4 x i32>]] zeroinitializer, align 16 +@bTile = hidden addrspace(3) global [10 x [10 x i32]] zeroinitializer, align 16 + +define void @CSMain() { +; CHECK-LABEL: define void @CSMain() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16 +; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2 +; FCHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([400 x i32], ptr addrspace(3) @aTile.scalarized.1dim, i32 0, i32 48), align 16 +; SCHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16 +; SCHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16 +; SCHECK-NEXT: ret void +; +entry: + %aFragPacked.i = alloca <4 x i32>, align 16 + %0 = load <4 x i32>, ptr addrspace(3) getelementptr inbounds ([10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x <4 x i32>]], ptr addrspace(3) @aTile, i32 0, i32 1), i32 0, i32 2), align 16 + store <4 x i32> %0, ptr %aFragPacked.i, align 16 + ret void +} + +define void @Main() { +; CHECK-LABEL: define void @Main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[BFRAGPACKED_I:%.*]] = alloca i32, align 16 +; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 2 +; FCHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 12), align 16 +; SCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16 +; SCHECK-NEXT: store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16 +; SCHECK-NEXT: ret void +; +entry: + %bFragPacked.i = alloca i32, align 16 + %0 = load i32, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 2), align 16 + store i32 %0, ptr %bFragPacked.i, align 16 + ret void +}