Skip to content

Commit d32f060

Browse files
committed
[AMDGPU] Add tests that demonstrates si-peephole-sdwa failure on V_CNDMASK
1 parent 292cfa7 commit d32f060

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; RUN: llc %s -march=amdgcn -mcpu=gfx1030 -o - 2>&1 | FileCheck %s
2+
; XFAIL: *
3+
4+
; V_CNDMASK_B32_e64 gets converted to V_CNDMASK_B32_e32, but the
5+
; expected conversion to SDWA does not occur. FIXME This leads to a
6+
; compilation error, because the use of $vcc in the resulting
7+
; instruction must be fixed to $vcc_lo for wave32. This only happens
8+
; after the full conversion to SDWA.
9+
10+
11+
; CHECK-NOT: {{.*}}V_CNDMASK_B32_e32{{.*}}$vcc
12+
; CHECK-NOT: {{.*}}Bad machine code: Virtual register defs don't dominate all uses
13+
14+
; ModuleID = 'test.ll'
15+
source_filename = "test.ll"
16+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
17+
target triple = "amdgcn-amd-amdhsa"
18+
19+
define amdgpu_kernel void @quux(i32 %arg, i1 %arg1, i1 %arg2) #0 {
20+
bb:
21+
br i1 %arg1, label %bb9, label %bb3
22+
23+
bb3: ; preds = %bb
24+
%call = tail call i32 @llvm.amdgcn.workitem.id.x()
25+
%mul = mul i32 %call, 5
26+
%zext = zext i32 %mul to i64
27+
%getelementptr = getelementptr i8, ptr addrspace(1) null, i64 %zext
28+
%getelementptr4 = getelementptr i8, ptr addrspace(1) %getelementptr, i64 4
29+
%load = load i8, ptr addrspace(1) %getelementptr4, align 1
30+
%getelementptr5 = getelementptr i8, ptr addrspace(1) %getelementptr, i64 3
31+
%load6 = load i8, ptr addrspace(1) %getelementptr5, align 1
32+
%insertelement = insertelement <5 x i8> poison, i8 %load, i64 4
33+
%select = select i1 %arg2, <5 x i8> %insertelement, <5 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 0>
34+
%insertelement7 = insertelement <5 x i8> %select, i8 %load6, i64 0
35+
%icmp = icmp ult i32 0, %arg
36+
%select8 = select i1 %icmp, <5 x i8> zeroinitializer, <5 x i8> %insertelement7
37+
%shufflevector = shufflevector <5 x i8> zeroinitializer, <5 x i8> %select8, <5 x i32> <i32 0, i32 1, i32 7, i32 8, i32 9>
38+
br label %bb9
39+
40+
bb9: ; preds = %bb3, %bb
41+
%phi = phi <5 x i8> [ %shufflevector, %bb3 ], [ zeroinitializer, %bb ]
42+
%extractelement = extractelement <5 x i8> %phi, i64 0
43+
store i8 %extractelement, ptr addrspace(1) null, align 1
44+
ret void
45+
}
46+
47+
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
48+
declare noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() #1
49+
50+
attributes #0 = { "target-cpu"="gfx1030" }
51+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1030" }

0 commit comments

Comments
 (0)