Skip to content

Commit 3890c97

Browse files
authored
[InferAddressSpaces] Fix bad addrspacecast insertion for phinode (#163528)
The IR verifier will carsh if there is any instructions located before phi-node. The `infer-address-spaces` pass would like to insert `addrspacecast` before phi-node in some corner cases. Indeed, since the operand pointer(phi-node's incoming value) has been determined to `NewAS` by the pass, it is safe to `addrspacecast` it immediately after the position where defined it. Co-authored-by: Kerang Mao <[email protected]>
1 parent c021e16 commit 3890c97

File tree

3 files changed

+151
-0
lines changed

3 files changed

+151
-0
lines changed

llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,41 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
617617
return Postorder;
618618
}
619619

620+
// Inserts an addrspacecast for a phi node operand, handling the proper
621+
// insertion position based on the operand type.
622+
static Value *phiNodeOperandWithNewAddressSpace(AddrSpaceCastInst *NewI,
623+
Value *Operand) {
624+
auto InsertBefore = [NewI](auto It) {
625+
NewI->insertBefore(It);
626+
NewI->setDebugLoc(It->getDebugLoc());
627+
return NewI;
628+
};
629+
630+
if (auto *Arg = dyn_cast<Argument>(Operand)) {
631+
// For arguments, insert the cast at the beginning of entry block.
632+
// Consider inserting at the dominating block for better placement.
633+
Function *F = Arg->getParent();
634+
auto InsertI = F->getEntryBlock().getFirstNonPHIIt();
635+
return InsertBefore(InsertI);
636+
}
637+
638+
// No check for Constant here, as constants are already handled.
639+
assert(isa<Instruction>(Operand));
640+
641+
Instruction *OpInst = cast<Instruction>(Operand);
642+
if (LLVM_UNLIKELY(OpInst->getOpcode() == Instruction::PHI)) {
643+
// If the operand is defined by another PHI node, insert after the first
644+
// non-PHI instruction at the corresponding basic block.
645+
auto InsertI = OpInst->getParent()->getFirstNonPHIIt();
646+
return InsertBefore(InsertI);
647+
}
648+
649+
// Otherwise, insert immediately after the operand definition.
650+
NewI->insertAfter(OpInst->getIterator());
651+
NewI->setDebugLoc(OpInst->getDebugLoc());
652+
return NewI;
653+
}
654+
620655
// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
621656
// of OperandUse.get() in the new address space. If the clone is not ready yet,
622657
// returns poison in the new address space as a placeholder.
@@ -642,6 +677,10 @@ static Value *operandWithNewAddressSpaceOrCreatePoison(
642677
unsigned NewAS = I->second;
643678
Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS);
644679
auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
680+
681+
if (LLVM_UNLIKELY(Inst->getOpcode() == Instruction::PHI))
682+
return phiNodeOperandWithNewAddressSpace(NewI, Operand);
683+
645684
NewI->insertBefore(Inst->getIterator());
646685
NewI->setDebugLoc(Inst->getDebugLoc());
647686
return NewI;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -passes='require<domtree>,infer-address-spaces' %s | FileCheck %s
3+
4+
define void @test(ptr %lhs_ptr, ptr %rhs_ptr) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[LHS_PTR:%.*]], ptr [[RHS_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[PTR_1:%.*]] = load ptr, ptr [[LHS_PTR]], align 8
9+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3)
10+
; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_1]])
11+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]])
12+
; CHECK-NEXT: [[PTR_2:%.*]] = load ptr, ptr [[RHS_PTR]], align 8
13+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR_2]] to ptr addrspace(3)
14+
; CHECK-NEXT: [[BOOL_2:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_2]])
15+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_2]])
16+
; CHECK-NEXT: br i1 poison, label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
17+
; CHECK: [[IF_THEN]]:
18+
; CHECK-NEXT: [[V1:%.*]] = load i32, ptr null, align 4
19+
; CHECK-NEXT: br label %[[IF_SINK_SPLIT:.*]]
20+
; CHECK: [[IF_ELSE]]:
21+
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr null, align 4
22+
; CHECK-NEXT: br label %[[IF_SINK_SPLIT]]
23+
; CHECK: [[IF_SINK_SPLIT]]:
24+
; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP1]], %[[IF_ELSE]] ]
25+
; CHECK-NEXT: [[V_SINK:%.*]] = phi i32 [ [[V1]], %[[IF_THEN]] ], [ [[V2]], %[[IF_ELSE]] ]
26+
; CHECK-NEXT: store i32 [[V_SINK]], ptr addrspace(3) [[PTR_SINK]], align 4
27+
; CHECK-NEXT: ret void
28+
;
29+
entry:
30+
%ptr.1 = load ptr, ptr %lhs_ptr, align 8
31+
%bool.1 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.1)
32+
tail call void @llvm.assume(i1 %bool.1)
33+
34+
%ptr.2 = load ptr, ptr %rhs_ptr, align 8
35+
%bool.2 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.2)
36+
tail call void @llvm.assume(i1 %bool.2)
37+
br i1 poison, label %if.then, label %if.else
38+
39+
if.then: ; preds = %entry
40+
%v1 = load i32, ptr null, align 4
41+
br label %if.sink.split
42+
43+
if.else: ; preds = %entry
44+
%v2 = load i32, ptr null, align 4
45+
br label %if.sink.split
46+
47+
if.sink.split: ; preds = %if.else, %if.then
48+
%ptr.sink = phi ptr [ %ptr.1, %if.then ], [ %ptr.2, %if.else ]
49+
%v.sink = phi i32 [ %v1, %if.then ], [ %v2, %if.else ]
50+
store i32 %v.sink, ptr %ptr.sink, align 4
51+
ret void
52+
}
53+
54+
declare void @llvm.assume(i1 noundef)
55+
declare i1 @llvm.amdgcn.is.shared(ptr)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes='require<domtree>,infer-address-spaces' %s | FileCheck %s
3+
4+
;;; Handle write corner case for infer-address-spaces with phi-nodes. The
5+
;;; verifier will crash if we insert `addrspacecast` before phi-node.
6+
7+
declare void @llvm.assume(i1 noundef)
8+
declare i1 @llvm.nvvm.isspacep.shared(ptr)
9+
declare i1 @llvm.nvvm.isspacep.global(ptr)
10+
11+
define void @phinode_instr() {
12+
; CHECK-LABEL: @phinode_instr(
13+
; CHECK-NEXT: entry:
14+
; CHECK-NEXT: [[PTR_1:%.*]] = load ptr, ptr null, align 8
15+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3)
16+
; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[PTR_1]])
17+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]])
18+
; CHECK-NEXT: br label [[IF_SINK_SPLIT:%.*]]
19+
; CHECK: if.sink.split:
20+
; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], [[ENTRY:%.*]] ]
21+
; CHECK-NEXT: store i32 1, ptr addrspace(3) [[PTR_SINK]], align 4
22+
; CHECK-NEXT: ret void
23+
;
24+
entry:
25+
%ptr.1 = load ptr, ptr null, align 8
26+
%bool.1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %ptr.1)
27+
tail call void @llvm.assume(i1 %bool.1)
28+
br label %if.sink.split
29+
30+
if.sink.split: ; preds = %entry
31+
%ptr.sink = phi ptr [ %ptr.1, %entry ]
32+
store i32 1, ptr %ptr.sink, align 4
33+
ret void
34+
}
35+
36+
define void @phinode_argument(ptr %lhs_ptr) {
37+
; CHECK-LABEL: @phinode_argument(
38+
; CHECK-NEXT: entry:
39+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[LHS_PTR:%.*]] to ptr addrspace(1)
40+
; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[LHS_PTR]])
41+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]])
42+
; CHECK-NEXT: br label [[IF_SINK_SPLIT:%.*]]
43+
; CHECK: if.sink.split:
44+
; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(1) [ [[TMP0]], [[ENTRY:%.*]] ]
45+
; CHECK-NEXT: store i32 1, ptr addrspace(1) [[PTR_SINK]], align 4
46+
; CHECK-NEXT: ret void
47+
;
48+
entry:
49+
%bool.1 = tail call i1 @llvm.nvvm.isspacep.global(ptr %lhs_ptr)
50+
tail call void @llvm.assume(i1 %bool.1)
51+
br label %if.sink.split
52+
53+
if.sink.split: ; preds = %entry
54+
%ptr.sink = phi ptr [ %lhs_ptr, %entry ]
55+
store i32 1, ptr %ptr.sink, align 4
56+
ret void
57+
}

0 commit comments

Comments
 (0)