Skip to content

Commit 617854f

Browse files
[AMDGPU] LRO: allow same-BB non-lookthrough users for PHI (#160909)
Loop headers frequently consume the loop-carried value in the header block via non-lookthrough ops (e.g. byte-wise vector binops). LiveRegOptimizer’s same-BB filter currently prunes these users, so the loop-carried PHI is not coerced to i32 and the intended packed form is lost. Relax the filter: when the def is a PHI, allow same-BB non-lookthrough users. Also fix the check to look at the user (CII) rather than the def (II) so the walk does not terminate prematurely.
1 parent 2e3f252 commit 617854f

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,10 @@ class LiveRegOptimizer {
150150
if (!CVisited.insert(CII).second)
151151
continue;
152152

153-
if (CII->getParent() == II->getParent() && !IsLookThru(II))
153+
// Same-BB filter must look at the *user*; and allow non-lookthrough
154+
// users when the def is a PHI (loop-header pattern).
155+
if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
156+
!isa<PHINode>(II))
154157
continue;
155158

156159
if (isOpLegal(CII))
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; RUN: opt -S -passes=amdgpu-late-codegenprepare \
2+
; RUN: -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s | FileCheck %s
3+
4+
; Goal: With a loop-header PHI in illegal vector type and a same-BB
5+
; non-lookthrough user (vector add) in the header, LRO should still coerce
6+
; the PHI to i32 because a profitable sink (store) exists across BB.
7+
8+
define amdgpu_kernel void @phi_samebb_nonlookthrough_store(
9+
ptr addrspace(1) %out, <4 x i8> %v, i1 %exit) {
10+
; CHECK-LABEL: @phi_samebb_nonlookthrough_store(
11+
entry:
12+
br label %loop
13+
14+
loop: ; preds = %entry, %loop
15+
; Loop-carried PHI in illegal vector type.
16+
%acc = phi <4 x i8> [ zeroinitializer, %entry ], [ %acc.next, %loop ]
17+
18+
; Same-BB non-lookthrough use in header.
19+
%acc.next = add <4 x i8> %acc, %v
20+
21+
; Make it a real loop: either iterate or exit to the sink block.
22+
br i1 %exit, label %store, label %loop
23+
24+
store: ; preds = %loop
25+
; The across-BB sink: storing the PHI coerced to i32.
26+
%acc.bc = bitcast <4 x i8> %acc to i32
27+
store i32 %acc.bc, ptr addrspace(1) %out, align 4
28+
ret void
29+
}
30+
31+
; After AMDGPULateCodeGenPrepare we expect:
32+
; - PHI is coerced to i32
33+
; - A header bitcast materializes for the add
34+
; This proves the same-BB non-lookthrough user (add) did not get pruned
35+
; when the def is a PHI.
36+
37+
; CHECK: loop:
38+
; CHECK: %[[ACC_TC:[^ ]+]] = phi i32
39+
; CHECK: %[[ACC_TC_BC:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
40+
; CHECK: %[[ACC_NEXT:[^ ]+]] = add <4 x i8> %[[ACC_TC_BC]], %v
41+
; CHECK: br i1 %exit, label %store, label %loop
42+
; CHECK: store:
43+
; CHECK: %[[ACC_TC_BC2:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
44+
; CHECK: %[[ST_I32:[^ ]+]] = bitcast <4 x i8> %[[ACC_TC_BC2]] to i32
45+
; CHECK: store i32 %[[ST_I32]],
46+

0 commit comments

Comments
 (0)