Skip to content

Commit c3d205a

Browse files
committed
Use S-expressions instead
1 parent 9dd73e6 commit c3d205a

File tree

2 files changed

+37
-32
lines changed

2 files changed

+37
-32
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,9 @@ def S_BREV_B64 : SOP1_64 <"s_brev_b64",
265265

266266
let Defs = [SCC] in {
267267
def S_BCNT0_I32_B32 : SOP1_32 <"s_bcnt0_i32_b32",
268-
[(set i32:$sdst, (UniformUnaryFrag<int_amdgcn_bcnt32_lo> i32:$src0))]
269-
>;
270-
def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64",
271-
[(set i32:$sdst, (UniformUnaryFrag<int_amdgcn_bcnt64_lo> i64:$src0))]
268+
[(set i32:$sdst, (UniformBinFrag<sub> 32, (UniformUnaryFrag<ctpop> i32:$src0)))]
272269
>;
270+
def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">;
273271
def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32",
274272
[(set i32:$sdst, (UniformUnaryFrag<ctpop> i32:$src0))]
275273
>;
@@ -1888,6 +1886,13 @@ def : GCNPat <
18881886
(S_MOV_B32 (i32 0)), sub1))
18891887
>;
18901888

1889+
def : GCNPat <
1890+
(i64 (UniformBinFrag<sub> 64, (UniformUnaryFrag<ctpop> i64:$src))),
1891+
(i64 (REG_SEQUENCE SReg_64,
1892+
(i32 (COPY_TO_REGCLASS (S_BCNT0_I32_B64 $src), SReg_32)), sub0,
1893+
(S_MOV_B32 (i32 0)), sub1))
1894+
>;
1895+
18911896
def : GCNPat <
18921897
(i32 (UniformBinFrag<smax> i32:$x, (i32 (ineg i32:$x)))),
18931898
(S_ABS_I32 SReg_32:$x)

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ define amdgpu_ps i32 @bcnt064(i64 inreg %val0) {
465465
; CHECK: ; %bb.0:
466466
; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
467467
; CHECK-NEXT: s_mov_b32 s1, 0
468+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
468469
; CHECK-NEXT: ;;#ASMSTART
469470
; CHECK-NEXT: ; use s[0:1]
470471
; CHECK-NEXT: ;;#ASMEND
@@ -682,19 +683,18 @@ define amdgpu_ps void @bcnt064_not_for_vregs(ptr addrspace(1) %out, ptr addrspac
682683
define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
683684
; CHECK-LABEL: bcnt032_ctpop_multiple_uses:
684685
; CHECK: ; %bb.0:
685-
; CHECK-NEXT: _i32_b32 s0, s0
686-
; CHECK-NEXT: 32 s1, 32, s0
687-
; CHECK-NEXT: g_u32 s1, 0
688-
; CHECK-NEXT: TART
689-
; CHECK-NEXT: 0
690-
; CHECK-NEXT: ND
691-
; CHECK-NEXT: TART
692-
; CHECK-NEXT: 1
693-
; CHECK-NEXT: ND
694-
; CHECK-NEXT: ct_b64 s[0:1], -1, 0
695-
; CHECK-NEXT: sk_b32_e64 v0, 0, 1, s[0:1]
696-
; CHECK-NEXT: irstlane_b32 s0, v0
697-
; CHECK-NEXT: n to shader part epilog
686+
; CHECK-NEXT: s_bcnt1_i32_b32 s1, s0
687+
; CHECK-NEXT: s_bcnt0_i32_b32 s0, s0
688+
; CHECK-NEXT: ;;#ASMSTART
689+
; CHECK-NEXT: ; use s1
690+
; CHECK-NEXT: ;;#ASMEND
691+
; CHECK-NEXT: ;;#ASMSTART
692+
; CHECK-NEXT: ; use s0
693+
; CHECK-NEXT: ;;#ASMEND
694+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
695+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
696+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
697+
; CHECK-NEXT: ; return to shader part epilog
698698
%result = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
699699
%result2 = sub i32 32, %result
700700
call void asm "; use $0", "s"(i32 %result)
@@ -707,21 +707,21 @@ define amdgpu_ps i32 @bcnt032_ctpop_multiple_uses(i32 inreg %val0) {
707707
define amdgpu_ps i32 @bcnt064_ctpop_multiple_uses(i64 inreg %val0) {
708708
; CHECK-LABEL: bcnt064_ctpop_multiple_uses:
709709
; CHECK: ; %bb.0:
710-
; CHECK-NEXT: _i32_b64 s0, s[0:1]
711-
; CHECK-NEXT: 32 s2, 64, s0
712-
; CHECK-NEXT: u32 s3, 0, 0
713-
; CHECK-NEXT: 32 s1, 0
714-
; CHECK-NEXT: g_u64 s[2:3], 0
715-
; CHECK-NEXT: TART
716-
; CHECK-NEXT: [0:1]
717-
; CHECK-NEXT: ND
718-
; CHECK-NEXT: ct_b64 s[0:1], -1, 0
719-
; CHECK-NEXT: sk_b32_e64 v0, 0, 1, s[0:1]
720-
; CHECK-NEXT: irstlane_b32 s0, v0
721-
; CHECK-NEXT: TART
722-
; CHECK-NEXT: [2:3]
723-
; CHECK-NEXT: ND
724-
; CHECK-NEXT: n to shader part epilog
710+
; CHECK-NEXT: s_mov_b32 s3, 0
711+
; CHECK-NEXT: s_bcnt1_i32_b64 s2, s[0:1]
712+
; CHECK-NEXT: s_bcnt0_i32_b64 s0, s[0:1]
713+
; CHECK-NEXT: s_mov_b32 s1, s3
714+
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
715+
; CHECK-NEXT: ;;#ASMSTART
716+
; CHECK-NEXT: ; use s[0:1]
717+
; CHECK-NEXT: ;;#ASMEND
718+
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
719+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
720+
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
721+
; CHECK-NEXT: ;;#ASMSTART
722+
; CHECK-NEXT: ; use s[2:3]
723+
; CHECK-NEXT: ;;#ASMEND
724+
; CHECK-NEXT: ; return to shader part epilog
725725
%result = call i64 @llvm.ctpop.i64(i64 %val0) nounwind readnone
726726
%result2 = sub i64 64, %result
727727
call void asm "; use $0", "s"(i64 %result)

0 commit comments

Comments
 (0)