Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,10 @@ class LiveRegOptimizer {
if (!CVisited.insert(CII).second)
continue;

if (CII->getParent() == II->getParent() && !IsLookThru(II))
// Same-BB filter must look at the *user*; and allow non-lookthrough
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should have test

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a minimal IR test (lro-phi-samebb-nonlookthrough-store.ll) that checks the same-BB filter looks at the user and allows non-lookthrough when the def is a PHI

// users when the def is a PHI (loop-header pattern).
if (CII->getParent() == II->getParent() && !IsLookThru(CII) &&
!isa<PHINode>(II))
continue;

if (isOpLegal(CII))
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/AMDGPU/lro-phi-samebb-nonlookthrough-store.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; RUN: opt -S -passes=amdgpu-late-codegenprepare \
; RUN: -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s | FileCheck %s

; Goal: With a loop-header PHI in illegal vector type and a same-BB
; non-lookthrough user (vector add) in the header, LRO should still coerce
; the PHI to i32 because a profitable sink (store) exists across BB.

define amdgpu_kernel void @phi_samebb_nonlookthrough_store(
ptr addrspace(1) %out, <4 x i8> %v, i1 %exit) {
; CHECK-LABEL: @phi_samebb_nonlookthrough_store(
entry:
br label %loop

loop: ; preds = %entry, %loop
; Loop-carried PHI in illegal vector type.
%acc = phi <4 x i8> [ zeroinitializer, %entry ], [ %acc.next, %loop ]

; Same-BB non-lookthrough use in header.
%acc.next = add <4 x i8> %acc, %v

; Make it a real loop: either iterate or exit to the sink block.
br i1 %exit, label %store, label %loop

store: ; preds = %loop
; The across-BB sink: storing the PHI coerced to i32.
%acc.bc = bitcast <4 x i8> %acc to i32
store i32 %acc.bc, ptr addrspace(1) %out, align 4
ret void
}

; After AMDGPULateCodeGenPrepare we expect:
; - PHI is coerced to i32
; - A header bitcast materializes for the add
; This proves the same-BB non-lookthrough user (add) did not get pruned
; when the def is a PHI.

; CHECK: loop:
; CHECK: %[[ACC_TC:[^ ]+]] = phi i32
; CHECK: %[[ACC_TC_BC:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
; CHECK: %[[ACC_NEXT:[^ ]+]] = add <4 x i8> %[[ACC_TC_BC]], %v
; CHECK: br i1 %exit, label %store, label %loop
; CHECK: store:
; CHECK: %[[ACC_TC_BC2:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8>
; CHECK: %[[ST_I32:[^ ]+]] = bitcast <4 x i8> %[[ACC_TC_BC2]] to i32
; CHECK: store i32 %[[ST_I32]],