Skip to content

Commit e025e5e

Browse files
committed
[AMDGPU] Reland generated test fix-wwm-vgpr-copy.ll (NFC)
Fix issues left over after generation.
1 parent 8da3852 commit e025e5e

File tree

1 file changed

+55
-14
lines changed

1 file changed

+55
-14
lines changed

llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
23

34
; NOTE: llvm.amdgcn.wwm is deprecated, use llvm.amdgcn.strict.wwm instead.
45

5-
; GCN-LABEL: wwm:
66
define amdgpu_hs void @wwm(i32 inreg %arg, ptr addrspace(8) inreg %buffer) {
7+
; GCN-LABEL: wwm:
8+
; GCN: ; %bb.0: ; %entry
9+
; GCN-NEXT: s_mov_b32 s7, s4
10+
; GCN-NEXT: s_mov_b32 s6, s3
11+
; GCN-NEXT: s_mov_b32 s5, s2
12+
; GCN-NEXT: s_mov_b32 s4, s1
13+
; GCN-NEXT: s_mov_b32 s1, 1
14+
; GCN-NEXT: v_mov_b32_e32 v0, 4
15+
; GCN-NEXT: s_not_b64 exec, exec
16+
; GCN-NEXT: v_mov_b32_e32 v0, 1
17+
; GCN-NEXT: s_not_b64 exec, exec
18+
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
19+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
20+
; GCN-NEXT: s_mov_b64 exec, s[2:3]
21+
; GCN-NEXT: s_cmp_lg_u32 s0, 0
22+
; GCN-NEXT: v_mov_b32_e32 v1, v0
23+
; GCN-NEXT: s_cbranch_scc0 .LBB0_2
24+
; GCN-NEXT: ; %bb.1: ; %bb42
25+
; GCN-NEXT: s_mov_b32 s1, 0
26+
; GCN-NEXT: .LBB0_2: ; %bb602
27+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
28+
; GCN-NEXT: s_cbranch_vccnz .LBB0_4
29+
; GCN-NEXT: ; %bb.3: ; %bb49
30+
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
31+
; GCN-NEXT: tbuffer_store_format_x v1, off, s[4:7], 1 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] offset:4 glc
32+
; GCN-NEXT: .LBB0_4: ; %bb54
33+
; GCN-NEXT: s_endpgm
734
entry:
835
br label %work
936

@@ -23,24 +50,44 @@ bb54:
2350
ret void
2451

2552
work:
26-
; GCN: s_not_b64 exec, exec
27-
; GCN: v_mov_b32_e32 v[[tmp1189:[0-9]+]], 1
28-
; GCN: s_not_b64 exec, exec
2953
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 4, i32 1)
3054

31-
; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
32-
; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
3355
%tmp1191 = mul i32 %tmp1189, 4
3456

35-
; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
3657
%tmp1196 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp1191)
3758

3859
%tmp34 = icmp eq i32 %arg, 0
3960
br i1 %tmp34, label %bb602, label %bb42
4061
}
4162

42-
; GCN-LABEL: strict_wwm:
4363
define amdgpu_hs void @strict_wwm(i32 inreg %arg, ptr addrspace(8) inreg %buffer) {
64+
; GCN-LABEL: strict_wwm:
65+
; GCN: ; %bb.0: ; %entry
66+
; GCN-NEXT: s_mov_b32 s7, s4
67+
; GCN-NEXT: s_mov_b32 s6, s3
68+
; GCN-NEXT: s_mov_b32 s5, s2
69+
; GCN-NEXT: s_mov_b32 s4, s1
70+
; GCN-NEXT: s_mov_b32 s1, 1
71+
; GCN-NEXT: v_mov_b32_e32 v0, 4
72+
; GCN-NEXT: s_not_b64 exec, exec
73+
; GCN-NEXT: v_mov_b32_e32 v0, 1
74+
; GCN-NEXT: s_not_b64 exec, exec
75+
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
76+
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
77+
; GCN-NEXT: s_mov_b64 exec, s[2:3]
78+
; GCN-NEXT: s_cmp_lg_u32 s0, 0
79+
; GCN-NEXT: v_mov_b32_e32 v1, v0
80+
; GCN-NEXT: s_cbranch_scc0 .LBB1_2
81+
; GCN-NEXT: ; %bb.1: ; %bb42
82+
; GCN-NEXT: s_mov_b32 s1, 0
83+
; GCN-NEXT: .LBB1_2: ; %bb602
84+
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
85+
; GCN-NEXT: s_cbranch_vccnz .LBB1_4
86+
; GCN-NEXT: ; %bb.3: ; %bb49
87+
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
88+
; GCN-NEXT: tbuffer_store_format_x v1, off, s[4:7], 1 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] offset:4 glc
89+
; GCN-NEXT: .LBB1_4: ; %bb54
90+
; GCN-NEXT: s_endpgm
4491
entry:
4592
br label %work
4693

@@ -60,16 +107,10 @@ bb54:
60107
ret void
61108

62109
work:
63-
; GCN: s_not_b64 exec, exec
64-
; GCN: v_mov_b32_e32 v[[tmp1189:[0-9]+]], 1
65-
; GCN: s_not_b64 exec, exec
66110
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 4, i32 1)
67111

68-
; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
69-
; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
70112
%tmp1191 = mul i32 %tmp1189, 4
71113

72-
; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
73114
%tmp1196 = tail call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp1191)
74115

75116
%tmp34 = icmp eq i32 %arg, 0

0 commit comments

Comments
 (0)