|
| 1 | +; RUN: opt -S -passes=amdgpu-late-codegenprepare \ |
| 2 | +; RUN: -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s | FileCheck %s |
| 3 | + |
| 4 | +; Goal: With a loop-header PHI in illegal vector type and a same-BB |
| 5 | +; non-lookthrough user (vector add) in the header, LRO should still coerce |
| 6 | +; the PHI to i32 because a profitable sink (store) exists across BB. |
| 7 | + |
| 8 | +define amdgpu_kernel void @phi_samebb_nonlookthrough_store( |
| 9 | + ptr addrspace(1) %out, <4 x i8> %v, i1 %exit) { |
| 10 | +; CHECK-LABEL: @phi_samebb_nonlookthrough_store( |
| 11 | +entry: |
| 12 | + br label %loop |
| 13 | + |
| 14 | +loop: ; preds = %entry, %loop |
| 15 | + ; Loop-carried PHI in illegal vector type. |
| 16 | + %acc = phi <4 x i8> [ zeroinitializer, %entry ], [ %acc.next, %loop ] |
| 17 | + |
| 18 | + ; Same-BB non-lookthrough use in header. |
| 19 | + %acc.next = add <4 x i8> %acc, %v |
| 20 | + |
| 21 | + ; Make it a real loop: either iterate or exit to the sink block. |
| 22 | + br i1 %exit, label %store, label %loop |
| 23 | + |
| 24 | +store: ; preds = %loop |
| 25 | + ; The across-BB sink: storing the PHI coerced to i32. |
| 26 | + %acc.bc = bitcast <4 x i8> %acc to i32 |
| 27 | + store i32 %acc.bc, ptr addrspace(1) %out, align 4 |
| 28 | + ret void |
| 29 | +} |
| 30 | + |
| 31 | +; After AMDGPULateCodeGenPrepare we expect: |
| 32 | +; - PHI is coerced to i32 |
| 33 | +; - A header bitcast materializes for the add |
| 34 | +; This proves the same-BB non-lookthrough user (add) did not get pruned |
| 35 | +; when the def is a PHI. |
| 36 | + |
| 37 | +; CHECK: loop: |
| 38 | +; CHECK: %[[ACC_TC:[^ ]+]] = phi i32 |
| 39 | +; CHECK: %[[ACC_TC_BC:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8> |
| 40 | +; CHECK: %[[ACC_NEXT:[^ ]+]] = add <4 x i8> %[[ACC_TC_BC]], %v |
| 41 | +; CHECK: br i1 %exit, label %store, label %loop |
| 42 | +; CHECK: store: |
| 43 | +; CHECK: %[[ACC_TC_BC2:[^ ]+]] = bitcast i32 %[[ACC_TC]] to <4 x i8> |
| 44 | +; CHECK: %[[ST_I32:[^ ]+]] = bitcast <4 x i8> %[[ACC_TC_BC2]] to i32 |
| 45 | +; CHECK: store i32 %[[ST_I32]], |
| 46 | + |
0 commit comments