Skip to content

Commit 8142cf3

Browse files
committed
[AMDGPU] Visit all PHIs in each call to optimizeLiveType
Make the Visited set a local variable, otherwise we can reject a PHI (those that do not have a zeroinitializer constant) but mark it as visited, and the rest of the function thinks the PHI is ok when it isn't. This is a bit crude but it's the only fix that consistently worked in my testing. Fixes SWDEV-541767
1 parent cd75c2f commit 8142cf3

File tree

2 files changed

+95
-2
lines changed

2 files changed

+95
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,6 @@ class LiveRegOptimizer {
7979

8080
/// The scalar type to convert to
8181
Type *const ConvertToScalar;
82-
/// The set of visited Instructions
83-
SmallPtrSet<Instruction *, 4> Visited;
8482
/// Map of Value -> Converted Value
8583
ValueToValueMap ValMap;
8684
/// Map of containing conversions from Optimal Type -> Original Type per BB.
@@ -288,6 +286,7 @@ bool LiveRegOptimizer::optimizeLiveType(
288286
SmallPtrSet<PHINode *, 4> PhiNodes;
289287
SmallPtrSet<Instruction *, 4> Defs;
290288
SmallPtrSet<Instruction *, 4> Uses;
289+
SmallPtrSet<Instruction *, 4> Visited;
291290

292291
Worklist.push_back(cast<Instruction>(I));
293292
while (!Worklist.empty()) {
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes="amdgpu-late-codegenprepare,verify" %s | FileCheck %s
3+
4+
; This crashed because the PHI with a splat was rejected, but then we marked the PHI
5+
; as visited and tried to convert one of its user afterwards.
6+
7+
define amdgpu_kernel void @widget(i1 %arg, <4 x i8> %arg1, i64 %arg2) {
8+
; CHECK-LABEL: define amdgpu_kernel void @widget(
9+
; CHECK-SAME: i1 [[ARG:%.*]], <4 x i8> [[ARG1:%.*]], i64 [[ARG2:%.*]]) {
10+
; CHECK-NEXT: [[BB:.*]]:
11+
; CHECK-NEXT: [[WIDGET_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
12+
; CHECK-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 36
13+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4
14+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
15+
; CHECK-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 40
16+
; CHECK-NEXT: [[ARG1_LOAD:%.*]] = load <4 x i8>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 8
17+
; CHECK-NEXT: [[ARG2_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[WIDGET_KERNARG_SEGMENT]], i64 44
18+
; CHECK-NEXT: [[ARG2_LOAD:%.*]] = load i64, ptr addrspace(4) [[ARG2_KERNARG_OFFSET]], align 4
19+
; CHECK-NEXT: br label %[[BB_3:.*]]
20+
; CHECK: [[BB_3]]:
21+
; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(1) [ null, %[[BB]] ], [ [[GETELEMENTPTR:%.*]], %[[BB_14:.*]] ]
22+
; CHECK-NEXT: [[PHI4:%.*]] = phi <4 x i8> [ splat (i8 1), %[[BB]] ], [ [[PHI15:%.*]], %[[BB_14]] ]
23+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_6_PREHEADER:.*]], label %[[BB_5:.*]]
24+
; CHECK: [[BB_5]]:
25+
; CHECK-NEXT: br label %[[BB_14]]
26+
; CHECK: [[BB_6_PREHEADER]]:
27+
; CHECK-NEXT: br label %[[BB_6:.*]]
28+
; CHECK: [[BB_6]]:
29+
; CHECK-NEXT: [[PHI7:%.*]] = phi <4 x i8> [ [[PHI13:%.*]], %[[BB_12:.*]] ], [ [[PHI4]], %[[BB_6_PREHEADER]] ]
30+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_8:.*]], label %[[BB_12]]
31+
; CHECK: [[BB_8]]:
32+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_10:.*]], label %[[BB_9:.*]]
33+
; CHECK: [[BB_9]]:
34+
; CHECK-NEXT: br label %[[BB_10]]
35+
; CHECK: [[BB_10]]:
36+
; CHECK-NEXT: [[PHI11:%.*]] = phi <4 x i8> [ [[PHI7]], %[[BB_9]] ], [ zeroinitializer, %[[BB_8]] ]
37+
; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x i8> [[PHI11]], i64 0
38+
; CHECK-NEXT: store i8 [[EXTRACTELEMENT]], ptr addrspace(1) [[PHI]], align 1
39+
; CHECK-NEXT: br label %[[BB_12]]
40+
; CHECK: [[BB_12]]:
41+
; CHECK-NEXT: [[PHI13]] = phi <4 x i8> [ zeroinitializer, %[[BB_10]] ], [ [[PHI7]], %[[BB_6]] ]
42+
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB_6]], label %[[BB_14]]
43+
; CHECK: [[BB_14]]:
44+
; CHECK-NEXT: [[PHI15]] = phi <4 x i8> [ [[ARG1_LOAD]], %[[BB_5]] ], [ zeroinitializer, %[[BB_12]] ]
45+
; CHECK-NEXT: [[GETELEMENTPTR]] = getelementptr i8, ptr addrspace(1) [[PHI]], i64 [[ARG2_LOAD]]
46+
; CHECK-NEXT: br label %[[BB_3]]
47+
;
48+
bb:
49+
%widget.kernarg.segment = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
50+
%arg.kernarg.offset.align.down = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 36
51+
%0 = load i32, ptr addrspace(4) %arg.kernarg.offset.align.down, align 4
52+
%1 = trunc i32 %0 to i1
53+
%arg1.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 40
54+
%arg1.load = load <4 x i8>, ptr addrspace(4) %arg1.kernarg.offset, align 8
55+
%arg2.kernarg.offset = getelementptr inbounds i8, ptr addrspace(4) %widget.kernarg.segment, i64 44
56+
%arg2.load = load i64, ptr addrspace(4) %arg2.kernarg.offset, align 4
57+
br label %bb.3
58+
59+
bb.3: ; preds = %bb.14, %bb
60+
%phi = phi ptr addrspace(1) [ null, %bb ], [ %getelementptr, %bb.14 ]
61+
%phi4 = phi <4 x i8> [ splat (i8 1), %bb ], [ %phi15, %bb.14 ]
62+
br i1 %1, label %bb.6.preheader, label %bb.5
63+
64+
bb.5: ; preds = %bb.3
65+
br label %bb.14
66+
67+
bb.6.preheader: ; preds = %bb.3
68+
br label %bb.6
69+
70+
bb.6: ; preds = %bb.6.preheader, %bb.12
71+
%phi7 = phi <4 x i8> [ %phi13, %bb.12 ], [ %phi4, %bb.6.preheader ]
72+
br i1 %1, label %bb.8, label %bb.12
73+
74+
bb.8: ; preds = %bb.6
75+
br i1 %1, label %bb.10, label %bb.9
76+
77+
bb.9: ; preds = %bb.8
78+
br label %bb.10
79+
80+
bb.10: ; preds = %bb.9, %bb.8
81+
%phi11 = phi <4 x i8> [ %phi7, %bb.9 ], [ zeroinitializer, %bb.8 ]
82+
%extractelement = extractelement <4 x i8> %phi11, i64 0
83+
store i8 %extractelement, ptr addrspace(1) %phi, align 1
84+
br label %bb.12
85+
86+
bb.12: ; preds = %bb.10, %bb.6
87+
%phi13 = phi <4 x i8> [ zeroinitializer, %bb.10 ], [ %phi7, %bb.6 ]
88+
br i1 %1, label %bb.6, label %bb.14
89+
90+
bb.14: ; preds = %bb.5, %bb.12
91+
%phi15 = phi <4 x i8> [ %arg1.load, %bb.5 ], [ zeroinitializer, %bb.12 ]
92+
%getelementptr = getelementptr i8, ptr addrspace(1) %phi, i64 %arg2.load
93+
br label %bb.3
94+
}

0 commit comments

Comments
 (0)