diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 3ad87545953ff..352a1b331001a 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -617,6 +617,41 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { return Postorder; } +// Inserts an addrspacecast for a phi node operand, handling the proper +// insertion position based on the operand type. +static Value *phiNodeOperandWithNewAddressSpace(AddrSpaceCastInst *NewI, + Value *Operand) { + auto InsertBefore = [NewI](auto It) { + NewI->insertBefore(It); + NewI->setDebugLoc(It->getDebugLoc()); + return NewI; + }; + + if (auto *Arg = dyn_cast(Operand)) { + // For arguments, insert the cast at the beginning of entry block. + // Consider inserting at the dominating block for better placement. + Function *F = Arg->getParent(); + auto InsertI = F->getEntryBlock().getFirstNonPHIIt(); + return InsertBefore(InsertI); + } + + // No check for Constant here, as constants are already handled. + assert(isa(Operand)); + + Instruction *OpInst = cast(Operand); + if (LLVM_UNLIKELY(OpInst->getOpcode() == Instruction::PHI)) { + // If the operand is defined by another PHI node, insert after the first + // non-PHI instruction at the corresponding basic block. + auto InsertI = OpInst->getParent()->getFirstNonPHIIt(); + return InsertBefore(InsertI); + } + + // Otherwise, insert immediately after the operand definition. + NewI->insertAfter(OpInst->getIterator()); + NewI->setDebugLoc(OpInst->getDebugLoc()); + return NewI; +} + // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone // of OperandUse.get() in the new address space. If the clone is not ready yet, // returns poison in the new address space as a placeholder. @@ -642,6 +677,10 @@ static Value *operandWithNewAddressSpaceOrCreatePoison( unsigned NewAS = I->second; Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS); auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); + + if (LLVM_UNLIKELY(Inst->getOpcode() == Instruction::PHI)) + return phiNodeOperandWithNewAddressSpace(NewI, Operand); + NewI->insertBefore(Inst->getIterator()); NewI->setDebugLoc(Inst->getDebugLoc()); return NewI; diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll new file mode 100644 index 0000000000000..319c26a24b271 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -passes='require,infer-address-spaces' %s | FileCheck %s + +define void @test(ptr %lhs_ptr, ptr %rhs_ptr) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[LHS_PTR:%.*]], ptr [[RHS_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PTR_1:%.*]] = load ptr, ptr [[LHS_PTR]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3) +; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_1]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = load ptr, ptr [[RHS_PTR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR_2]] to ptr addrspace(3) +; CHECK-NEXT: [[BOOL_2:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_2]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_2]]) +; CHECK-NEXT: br i1 poison, label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: br label %[[IF_SINK_SPLIT:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: br label %[[IF_SINK_SPLIT]] +; CHECK: [[IF_SINK_SPLIT]]: +; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP1]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[V_SINK:%.*]] = phi i32 [ [[V1]], %[[IF_THEN]] ], [ [[V2]], %[[IF_ELSE]] ] +; CHECK-NEXT: store i32 [[V_SINK]], ptr addrspace(3) [[PTR_SINK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %ptr.1 = load ptr, ptr %lhs_ptr, align 8 + %bool.1 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.1) + tail call void @llvm.assume(i1 %bool.1) + + %ptr.2 = load ptr, ptr %rhs_ptr, align 8 + %bool.2 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.2) + tail call void @llvm.assume(i1 %bool.2) + br i1 poison, label %if.then, label %if.else + +if.then: ; preds = %entry + %v1 = load i32, ptr null, align 4 + br label %if.sink.split + +if.else: ; preds = %entry + %v2 = load i32, ptr null, align 4 + br label %if.sink.split + +if.sink.split: ; preds = %if.else, %if.then + %ptr.sink = phi ptr [ %ptr.1, %if.then ], [ %ptr.2, %if.else ] + %v.sink = phi i32 [ %v1, %if.then ], [ %v2, %if.else ] + store i32 %v.sink, ptr %ptr.sink, align 4 + ret void +} + +declare void @llvm.assume(i1 noundef) +declare i1 @llvm.amdgcn.is.shared(ptr) diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll new file mode 100644 index 0000000000000..5a3a8b9117b35 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes='require,infer-address-spaces' %s | FileCheck %s + +;;; Handle write corner case for infer-address-spaces with phi-nodes. The +;;; verifier will crash if we insert `addrspacecast` before phi-node. + +declare void @llvm.assume(i1 noundef) +declare i1 @llvm.nvvm.isspacep.shared(ptr) +declare i1 @llvm.nvvm.isspacep.global(ptr) + +define void @phinode_instr() { +; CHECK-LABEL: @phinode_instr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_1:%.*]] = load ptr, ptr null, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3) +; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[PTR_1]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]]) +; CHECK-NEXT: br label [[IF_SINK_SPLIT:%.*]] +; CHECK: if.sink.split: +; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr addrspace(3) [[PTR_SINK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %ptr.1 = load ptr, ptr null, align 8 + %bool.1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %ptr.1) + tail call void @llvm.assume(i1 %bool.1) + br label %if.sink.split + +if.sink.split: ; preds = %entry + %ptr.sink = phi ptr [ %ptr.1, %entry ] + store i32 1, ptr %ptr.sink, align 4 + ret void +} + +define void @phinode_argument(ptr %lhs_ptr) { +; CHECK-LABEL: @phinode_argument( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[LHS_PTR:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[LHS_PTR]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]]) +; CHECK-NEXT: br label [[IF_SINK_SPLIT:%.*]] +; CHECK: if.sink.split: +; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(1) [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[PTR_SINK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %bool.1 = tail call i1 @llvm.nvvm.isspacep.global(ptr %lhs_ptr) + tail call void @llvm.assume(i1 %bool.1) + br label %if.sink.split + +if.sink.split: ; preds = %entry + %ptr.sink = phi ptr [ %lhs_ptr, %entry ] + store i32 1, ptr %ptr.sink, align 4 + ret void +}