1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2
3
3
4
; NOTE: llvm.amdgcn.wwm is deprecated, use llvm.amdgcn.strict.wwm instead.
4
5
5
- ; GCN-LABEL: wwm:
6
6
define amdgpu_hs void @wwm (i32 inreg %arg , ptr addrspace (8 ) inreg %buffer ) {
7
+ ; GCN-LABEL: wwm:
8
+ ; GCN: ; %bb.0: ; %entry
9
+ ; GCN-NEXT: s_mov_b32 s7, s4
10
+ ; GCN-NEXT: s_mov_b32 s6, s3
11
+ ; GCN-NEXT: s_mov_b32 s5, s2
12
+ ; GCN-NEXT: s_mov_b32 s4, s1
13
+ ; GCN-NEXT: s_mov_b32 s1, 1
14
+ ; GCN-NEXT: v_mov_b32_e32 v0, 4
15
+ ; GCN-NEXT: s_not_b64 exec, exec
16
+ ; GCN-NEXT: v_mov_b32_e32 v0, 1
17
+ ; GCN-NEXT: s_not_b64 exec, exec
18
+ ; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
19
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
20
+ ; GCN-NEXT: s_mov_b64 exec, s[2:3]
21
+ ; GCN-NEXT: s_cmp_lg_u32 s0, 0
22
+ ; GCN-NEXT: v_mov_b32_e32 v1, v0
23
+ ; GCN-NEXT: s_cbranch_scc0 .LBB0_2
24
+ ; GCN-NEXT: ; %bb.1: ; %bb42
25
+ ; GCN-NEXT: s_mov_b32 s1, 0
26
+ ; GCN-NEXT: .LBB0_2: ; %bb602
27
+ ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
28
+ ; GCN-NEXT: s_cbranch_vccnz .LBB0_4
29
+ ; GCN-NEXT: ; %bb.3: ; %bb49
30
+ ; GCN-NEXT: v_mov_b32_e32 v1, 1.0
31
+ ; GCN-NEXT: tbuffer_store_format_x v1, off, s[4:7], 1 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] offset:4 glc
32
+ ; GCN-NEXT: .LBB0_4: ; %bb54
33
+ ; GCN-NEXT: s_endpgm
7
34
entry:
8
35
br label %work
9
36
@@ -23,24 +50,44 @@ bb54:
23
50
ret void
24
51
25
52
work:
26
- ; GCN: s_not_b64 exec, exec
27
- ; GCN: v_mov_b32_e32 v[[tmp1189:[0-9]+]], 1
28
- ; GCN: s_not_b64 exec, exec
29
53
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32 (i32 4 , i32 1 )
30
54
31
- ; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
32
- ; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
33
55
%tmp1191 = mul i32 %tmp1189 , 4
34
56
35
- ; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
36
57
%tmp1196 = tail call i32 @llvm.amdgcn.wwm.i32 (i32 %tmp1191 )
37
58
38
59
%tmp34 = icmp eq i32 %arg , 0
39
60
br i1 %tmp34 , label %bb602 , label %bb42
40
61
}
41
62
42
- ; GCN-LABEL: strict_wwm:
43
63
define amdgpu_hs void @strict_wwm (i32 inreg %arg , ptr addrspace (8 ) inreg %buffer ) {
64
+ ; GCN-LABEL: strict_wwm:
65
+ ; GCN: ; %bb.0: ; %entry
66
+ ; GCN-NEXT: s_mov_b32 s7, s4
67
+ ; GCN-NEXT: s_mov_b32 s6, s3
68
+ ; GCN-NEXT: s_mov_b32 s5, s2
69
+ ; GCN-NEXT: s_mov_b32 s4, s1
70
+ ; GCN-NEXT: s_mov_b32 s1, 1
71
+ ; GCN-NEXT: v_mov_b32_e32 v0, 4
72
+ ; GCN-NEXT: s_not_b64 exec, exec
73
+ ; GCN-NEXT: v_mov_b32_e32 v0, 1
74
+ ; GCN-NEXT: s_not_b64 exec, exec
75
+ ; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
76
+ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
77
+ ; GCN-NEXT: s_mov_b64 exec, s[2:3]
78
+ ; GCN-NEXT: s_cmp_lg_u32 s0, 0
79
+ ; GCN-NEXT: v_mov_b32_e32 v1, v0
80
+ ; GCN-NEXT: s_cbranch_scc0 .LBB1_2
81
+ ; GCN-NEXT: ; %bb.1: ; %bb42
82
+ ; GCN-NEXT: s_mov_b32 s1, 0
83
+ ; GCN-NEXT: .LBB1_2: ; %bb602
84
+ ; GCN-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
85
+ ; GCN-NEXT: s_cbranch_vccnz .LBB1_4
86
+ ; GCN-NEXT: ; %bb.3: ; %bb49
87
+ ; GCN-NEXT: v_mov_b32_e32 v1, 1.0
88
+ ; GCN-NEXT: tbuffer_store_format_x v1, off, s[4:7], 1 format:[BUF_DATA_FORMAT_32,BUF_NUM_FORMAT_FLOAT] offset:4 glc
89
+ ; GCN-NEXT: .LBB1_4: ; %bb54
90
+ ; GCN-NEXT: s_endpgm
44
91
entry:
45
92
br label %work
46
93
@@ -60,16 +107,10 @@ bb54:
60
107
ret void
61
108
62
109
work:
63
- ; GCN: s_not_b64 exec, exec
64
- ; GCN: v_mov_b32_e32 v[[tmp1189:[0-9]+]], 1
65
- ; GCN: s_not_b64 exec, exec
66
110
%tmp1189 = tail call i32 @llvm.amdgcn.set.inactive.i32 (i32 4 , i32 1 )
67
111
68
- ; GCN: s_or_saveexec_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], -1
69
- ; GCN: v_lshlrev_b32_e32 v[[tmp1191:[0-9]+]], 2, v[[tmp1189]]
70
112
%tmp1191 = mul i32 %tmp1189 , 4
71
113
72
- ; GCN: s_mov_b64 exec, s[[[LO]]:[[HI]]]
73
114
%tmp1196 = tail call i32 @llvm.amdgcn.strict.wwm.i32 (i32 %tmp1191 )
74
115
75
116
%tmp34 = icmp eq i32 %arg , 0
0 commit comments