Skip to content

Commit c617ef5

Browse files
committed
Review changes
1 parent c3d205a commit c617ef5

File tree

3 files changed

+112
-110
lines changed

3 files changed

+112
-110
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ def S_BREV_B64 : SOP1_64 <"s_brev_b64",
265265

266266
let Defs = [SCC] in {
267267
def S_BCNT0_I32_B32 : SOP1_32 <"s_bcnt0_i32_b32",
268-
[(set i32:$sdst, (UniformBinFrag<sub> 32, (UniformUnaryFrag<ctpop> i32:$src0)))]
268+
[(set i32:$sdst, (UniformBinFrag<sub> 32, (ctpop i32:$src0)))]
269269
>;
270270
def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">;
271271
def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32",
@@ -1887,7 +1887,7 @@ def : GCNPat <
18871887
>;
18881888

18891889
def : GCNPat <
1890-
(i64 (UniformBinFrag<sub> 64, (UniformUnaryFrag<ctpop> i64:$src))),
1890+
(i64 (UniformBinFrag<sub> 64, (ctpop i64:$src))),
18911891
(i64 (REG_SEQUENCE SReg_64,
18921892
(i32 (COPY_TO_REGCLASS (S_BCNT0_I32_B64 $src), SReg_32)), sub0,
18931893
(S_MOV_B32 (i32 0)), sub1))
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
3+
4+
define amdgpu_ps void @bcnt032_not_for_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
5+
; CHECK-LABEL: bcnt032_not_for_vregs:
6+
; CHECK: ; %bb.0:
7+
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
8+
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
9+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
10+
; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
11+
; CHECK-NEXT: s_waitcnt vmcnt(0)
12+
; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
13+
; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
14+
; CHECK-NEXT: ;;#ASMSTART
15+
; CHECK-NEXT: ; use v3
16+
; CHECK-NEXT: ;;#ASMEND
17+
; CHECK-NEXT: global_store_dword v[0:1], v2, off
18+
; CHECK-NEXT: s_endpgm
19+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
20+
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
21+
%val0 = load volatile i32, ptr addrspace(1) %gep
22+
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
23+
%result2 = sub i32 32, %result
24+
call void asm "; use $0", "s"(i32 %result2)
25+
%cmp = icmp ne i32 %result2, 0
26+
%zext = zext i1 %cmp to i32
27+
store i32 %result, ptr addrspace(1) %out
28+
ret void
29+
}
30+
31+
define amdgpu_ps void @bcnt064_not_for_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
32+
; CHECK-LABEL: bcnt064_not_for_vregs:
33+
; CHECK: ; %bb.0:
34+
; CHECK-NEXT: b32 s0, s0, 2
35+
; CHECK-NEXT: o_u32_e32 v2, vcc, s0, v2
36+
; CHECK-NEXT: co_u32_e32 v3, vcc, 0, v3, vcc
37+
; CHECK-NEXT: load_dwordx2 v[2:3], v[2:3], off glc
38+
; CHECK-NEXT: nt vmcnt(0)
39+
; CHECK-NEXT: 32_e32 v4, 0
40+
; CHECK-NEXT: u32_b32 v2, v2, 0
41+
; CHECK-NEXT: u32_b32 v3, v3, v2
42+
; CHECK-NEXT: o_u32_e32 v5, vcc, 64, v3
43+
; CHECK-NEXT: co_u32_e64 v6, s[0:1], 0, 0, vcc
44+
; CHECK-NEXT: TART
45+
; CHECK-NEXT: [5:6]
46+
; CHECK-NEXT: ND
47+
; CHECK-NEXT: store_dwordx2 v[0:1], v[3:4], off
48+
; CHECK-NEXT: m
49+
%tid = call i32 @llvm.amdgcn.workitem.id.x()
50+
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
51+
%val0 = load volatile i64, ptr addrspace(1) %gep
52+
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
53+
%result2 = sub i64 64, %result
54+
call void asm "; use $0", "s"(i64 %result2)
55+
%cmp = icmp ne i64 %result2, 0
56+
%zext = zext i1 %cmp to i32
57+
store i64 %result, ptr addrspace(1) %out
58+
ret void
59+
}
60+
61+
define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
62+
; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
63+
; CHECK: ; %bb.0:
64+
; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
65+
; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
66+
; CHECK-NEXT: ;;#ASMSTART
67+
; CHECK-NEXT: ; use s1
68+
; CHECK-NEXT: ;;#ASMEND
69+
; CHECK-NEXT: ;;#ASMSTART
70+
; CHECK-NEXT: ; use s0
71+
; CHECK-NEXT: ;;#ASMEND
72+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
73+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
74+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
75+
; CHECK-NEXT: ; return to shader part epilog
76+
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
77+
%result2 = sub i32 32, %result
78+
call void asm "; use $0", "s"(i32 %result)
79+
call void asm "; use $0", "s"(i32 %result2)
80+
%cmp = icmp ne i32 %result2, 0
81+
%zext = zext i1 %cmp to i32
82+
ret i32 %zext
83+
}
84+
85+
define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses(i64 inreg %val0) {
86+
; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
87+
; CHECK: ; %bb.0:
88+
; CHECK-NEXT: s_mov_b32 s3, 0
89+
; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
90+
; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
91+
; CHECK-NEXT: s_mov_b32 s1, s3
92+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
93+
; CHECK-NEXT: ;;#ASMSTART
94+
; CHECK-NEXT: ; use s[0:1]
95+
; CHECK-NEXT: ;;#ASMEND
96+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
97+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
98+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
99+
; CHECK-NEXT: ;;#ASMSTART
100+
; CHECK-NEXT: ; use s[2:3]
101+
; CHECK-NEXT: ;;#ASMEND
102+
; CHECK-NEXT: ; return to shader part epilog
103+
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
104+
%result2 = sub i64 64, %result
105+
call void asm "; use $0", "s"(i64 %result)
106+
call void asm "; use $0", "s"(i64 %result2)
107+
%cmp = icmp ne i64 %result2, 0
108+
%zext = zext i1 %cmp to i32
109+
ret i32 %zext
110+
}

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 0 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -621,112 +621,4 @@ if:
621621

622622
endif:
623623
ret i32 1
624-
}
625-
626-
define amdgpu_ps void @bcnt032_not_for_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
627-
; CHECK-LABEL: bcnt032_not_for_vregs:
628-
; CHECK: ; %bb.0:
629-
; CHECK-NEXT: s_lshl_b32 s0, s0, 2
630-
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s0, v2
631-
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
632-
; CHECK-NEXT: global_load_dword v2, v[2:3], off glc
633-
; CHECK-NEXT: s_waitcnt vmcnt(0)
634-
; CHECK-NEXT: v_bcnt_u32_b32 v2, v2, 0
635-
; CHECK-NEXT: v_sub_u32_e32 v3, 32, v2
636-
; CHECK-NEXT: ;;#ASMSTART
637-
; CHECK-NEXT: ; use v3
638-
; CHECK-NEXT: ;;#ASMEND
639-
; CHECK-NEXT: global_store_dword v[0:1], v2, off
640-
; CHECK-NEXT: s_endpgm
641-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
642-
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
643-
%val0 = load volatile i32, ptr addrspace(1) %gep
644-
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
645-
%result2 = sub i32 32, %result
646-
call void asm "; use $0", "s"(i32 %result2)
647-
%cmp = icmp ne i32 %result2, 0
648-
%zext = zext i1 %cmp to i32
649-
store i32 %result, ptr addrspace(1) %out
650-
ret void
651-
}
652-
653-
define amdgpu_ps void @bcnt064_not_for_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
654-
; CHECK-LABEL: bcnt064_not_for_vregs:
655-
; CHECK: ; %bb.0:
656-
; CHECK-NEXT: b32 s0, s0, 2
657-
; CHECK-NEXT: o_u32_e32 v2, vcc, s0, v2
658-
; CHECK-NEXT: co_u32_e32 v3, vcc, 0, v3, vcc
659-
; CHECK-NEXT: load_dwordx2 v[2:3], v[2:3], off glc
660-
; CHECK-NEXT: nt vmcnt(0)
661-
; CHECK-NEXT: 32_e32 v4, 0
662-
; CHECK-NEXT: u32_b32 v2, v2, 0
663-
; CHECK-NEXT: u32_b32 v3, v3, v2
664-
; CHECK-NEXT: o_u32_e32 v5, vcc, 64, v3
665-
; CHECK-NEXT: co_u32_e64 v6, s[0:1], 0, 0, vcc
666-
; CHECK-NEXT: TART
667-
; CHECK-NEXT: [5:6]
668-
; CHECK-NEXT: ND
669-
; CHECK-NEXT: store_dwordx2 v[0:1], v[3:4], off
670-
; CHECK-NEXT: m
671-
%tid = call i32 @llvm.amdgcn.workitem.id.x()
672-
%gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
673-
%val0 = load volatile i64, ptr addrspace(1) %gep
674-
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
675-
%result2 = sub i64 64, %result
676-
call void asm "; use $0", "s"(i64 %result2)
677-
%cmp = icmp ne i64 %result2, 0
678-
%zext = zext i1 %cmp to i32
679-
store i64 %result, ptr addrspace(1) %out
680-
ret void
681-
}
682-
683-
define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
684-
; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
685-
; CHECK: ; %bb.0:
686-
; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
687-
; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
688-
; CHECK-NEXT: ;;#ASMSTART
689-
; CHECK-NEXT: ; use s1
690-
; CHECK-NEXT: ;;#ASMEND
691-
; CHECK-NEXT: ;;#ASMSTART
692-
; CHECK-NEXT: ; use s0
693-
; CHECK-NEXT: ;;#ASMEND
694-
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
695-
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
696-
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
697-
; CHECK-NEXT: ; return to shader part epilog
698-
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
699-
%result2 = sub i32 32, %result
700-
call void asm "; use $0", "s"(i32 %result)
701-
call void asm "; use $0", "s"(i32 %result2)
702-
%cmp = icmp ne i32 %result2, 0
703-
%zext = zext i1 %cmp to i32
704-
ret i32 %zext
705-
}
706-
707-
define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses(i64 inreg %val0) {
708-
; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
709-
; CHECK: ; %bb.0:
710-
; CHECK-NEXT: s_mov_b32 s3, 0
711-
; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
712-
; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
713-
; CHECK-NEXT: s_mov_b32 s1, s3
714-
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
715-
; CHECK-NEXT: ;;#ASMSTART
716-
; CHECK-NEXT: ; use s[0:1]
717-
; CHECK-NEXT: ;;#ASMEND
718-
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
719-
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
720-
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
721-
; CHECK-NEXT: ;;#ASMSTART
722-
; CHECK-NEXT: ; use s[2:3]
723-
; CHECK-NEXT: ;;#ASMEND
724-
; CHECK-NEXT: ; return to shader part epilog
725-
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
726-
%result2 = sub i64 64, %result
727-
call void asm "; use $0", "s"(i64 %result)
728-
call void asm "; use $0", "s"(i64 %result2)
729-
%cmp = icmp ne i64 %result2, 0
730-
%zext = zext i1 %cmp to i32
731-
ret i32 %zext
732624
}

0 commit comments

Comments
 (0)