@@ -40,7 +40,64 @@ define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
40
40
ret void
41
41
}
42
42
43
+ define amdgpu_kernel void @set_inactive_scc (i32 addrspace (1 )* %out , i32 %in , <4 x i32 > inreg %desc ) {
44
+ ; GCN-LABEL: set_inactive_scc:
45
+ ; GCN: ; %bb.0:
46
+ ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
47
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
48
+ ; GCN-NEXT: s_buffer_load_dword s2, s[4:7], 0x0
49
+ ; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
50
+ ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
51
+ ; GCN-NEXT: s_waitcnt lgkmcnt(0)
52
+ ; GCN-NEXT: s_cmp_lg_u32 s2, 56
53
+ ; GCN-NEXT: v_mov_b32_e32 v0, s0
54
+ ; GCN-NEXT: s_cselect_b32 s0, 1, 0
55
+ ; GCN-NEXT: s_not_b64 exec, exec
56
+ ; GCN-NEXT: v_mov_b32_e32 v0, 42
57
+ ; GCN-NEXT: s_not_b64 exec, exec
58
+ ; GCN-NEXT: s_and_b32 s0, s0, 1
59
+ ; GCN-NEXT: s_cmp_lg_u32 s0, 0
60
+ ; GCN-NEXT: s_cbranch_scc0 BB2_2
61
+ ; GCN-NEXT: ; %bb.1: ; %.one
62
+ ; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0
63
+ ; GCN-NEXT: s_mov_b32 s6, -1
64
+ ; GCN-NEXT: s_mov_b32 s7, 0xf000
65
+ ; GCN-NEXT: s_mov_b32 s0, 0
66
+ ; GCN-NEXT: buffer_store_dword v1, off, s[4:7], 0
67
+ ; GCN-NEXT: s_branch BB2_3
68
+ ; GCN-NEXT: BB2_2:
69
+ ; GCN-NEXT: s_mov_b32 s0, -1
70
+ ; GCN-NEXT: BB2_3: ; %Flow
71
+ ; GCN-NEXT: s_xor_b32 s0, s0, -1
72
+ ; GCN-NEXT: s_and_b32 s0, s0, 1
73
+ ; GCN-NEXT: s_cmp_lg_u32 s0, 0
74
+ ; GCN-NEXT: s_cbranch_scc1 BB2_5
75
+ ; GCN-NEXT: ; %bb.4: ; %.zero
76
+ ; GCN-NEXT: s_mov_b32 s6, -1
77
+ ; GCN-NEXT: s_mov_b32 s7, 0xf000
78
+ ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
79
+ ; GCN-NEXT: BB2_5: ; %.exit
80
+ ; GCN-NEXT: s_endpgm
81
+ %val = call i32 @llvm.amdgcn.s.buffer.load.i32 (<4 x i32 > %desc , i32 0 , i32 0 )
82
+ %cmp = icmp eq i32 %val , 56
83
+ %tmp = call i32 @llvm.amdgcn.set.inactive.i32 (i32 %in , i32 42 ) #0
84
+ br i1 %cmp , label %.zero , label %.one
85
+
86
+ .zero:
87
+ store i32 %tmp , i32 addrspace (1 )* %out
88
+ br label %.exit
89
+
90
+ .one:
91
+ %tmp.1 = add i32 %tmp , 1
92
+ store i32 %tmp.1 , i32 addrspace (1 )* %out
93
+ br label %.exit
94
+
95
+ .exit:
96
+ ret void
97
+ }
98
+
43
99
declare i32 @llvm.amdgcn.set.inactive.i32 (i32 , i32 ) #0
44
100
declare i64 @llvm.amdgcn.set.inactive.i64 (i64 , i64 ) #0
101
+ declare i32 @llvm.amdgcn.s.buffer.load.i32 (<4 x i32 >, i32 , i32 )
45
102
46
103
attributes #0 = { convergent readnone }
0 commit comments