Skip to content

Commit 6af0eee

Browse files
committed
s_quadmask* implicitly defines SCC
Signed-off-by: John Lu <[email protected]>
1 parent 91c35d6 commit 6af0eee

File tree

2 files changed

+59
-0
lines changed

2 files changed

+59
-0
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,12 @@ def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">;
352352

353353
} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
354354

355+
let Defs = [SCC] in {
355356
def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32",
356357
[(set i32:$sdst, (int_amdgcn_s_quadmask i32:$src0))]>;
357358
def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64",
358359
[(set i64:$sdst, (int_amdgcn_s_quadmask i64:$src0))]>;
360+
}
359361

360362
let Uses = [M0] in {
361363
def S_MOVRELS_B32 : SOP1_32R <"s_movrels_b32">;
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck %s
3+
;; Ensure that AND/ICMP cannot be fused into an AND because s_quadmask_32 implicitly defines SCC.
4+
5+
define amdgpu_kernel void @quadmask_32(i32 %val0, i32 %val1, ptr addrspace(1) %ptr) {
6+
; CHECK-LABEL: quadmask_32:
7+
; CHECK: ; %bb.0:
8+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
9+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
10+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
11+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
12+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
13+
; CHECK-NEXT: s_and_b32 s0, s0, 1
14+
; CHECK-NEXT: s_quadmask_b32 s1, s1
15+
; CHECK-NEXT: s_cmp_eq_u32 s0, 0
16+
; CHECK-NEXT: v_mov_b32_e32 v3, s1
17+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
18+
; CHECK-NEXT: global_store_dword v2, v3, s[2:3]
19+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
20+
; CHECK-NEXT: global_store_dword v[0:1], v2, off
21+
; CHECK-NEXT: s_endpgm
22+
%and = and i32 %val0, 1
23+
%result = call i32 @llvm.amdgcn.s.quadmask.i32(i32 %val1) nounwind readnone
24+
store i32 %result, ptr addrspace(1) %ptr
25+
%cmp = icmp eq i32 %and, 0
26+
%sel = select i1 %cmp, i32 1, i32 0
27+
store i32 %sel, ptr addrspace(1) null, align 4
28+
ret void
29+
}
30+
31+
define amdgpu_kernel void @quadmask_64(i32 %val0, i64 %val1, ptr addrspace(1) %ptr) {
32+
; CHECK-LABEL: quadmask_64:
33+
; CHECK: ; %bb.0:
34+
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x0
35+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x8
36+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
37+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
38+
; CHECK-NEXT: s_and_b32 s4, s6, 1
39+
; CHECK-NEXT: s_quadmask_b64 s[0:1], s[0:1]
40+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
41+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
42+
; CHECK-NEXT: s_cmp_eq_u32 s4, 0
43+
; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
44+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
45+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
46+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
47+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
48+
; CHECK-NEXT: global_store_dword v[0:1], v2, off
49+
; CHECK-NEXT: s_endpgm
50+
%and = and i32 %val0, 1
51+
%result = call i64 @llvm.amdgcn.s.quadmask.i64(i64 %val1) nounwind readnone
52+
store i64 %result, ptr addrspace(1) %ptr
53+
%cmp = icmp eq i32 %and, 0
54+
%sel = select i1 %cmp, i32 1, i32 0
55+
store i32 %sel, ptr addrspace(1) null, align 4
56+
ret void
57+
}

0 commit comments

Comments
 (0)