Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,15 +550,19 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
uint32_t NewImm = 0;

if (Opc == AMDGPU::S_AND_B32) {
if (isPowerOf2_32(~Imm)) {
MachineOperand *SccDef =
MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
if (isPowerOf2_32(~Imm) && SccDef->isDead()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (isPowerOf2_32(~Imm) && SccDef->isDead()) {
if (SccDef->isDead() && isPowerOf2_32(~Imm)) {

NewImm = llvm::countr_one(Imm);
Opc = AMDGPU::S_BITSET0_B32;
} else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
NewImm = ~Imm;
Opc = AMDGPU::S_ANDN2_B32;
}
} else if (Opc == AMDGPU::S_OR_B32) {
if (isPowerOf2_32(Imm)) {
MachineOperand *SccDef =
MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr);
if (isPowerOf2_32(Imm) && SccDef->isDead()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (isPowerOf2_32(Imm) && SccDef->isDead()) {
if (SccDef->isDead() && isPowerOf2_32(Imm)) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, short-circuit the call to findRegisterDefOperand in the pretty common case that Imm is not a power of two?

Suggested change
if (isPowerOf2_32(Imm) && SccDef->isDead()) {
if (isPowerOf2_32(Imm) && MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Short-circuited findRegisterDefOperand calls.

NewImm = llvm::countr_zero(Imm);
Opc = AMDGPU::S_BITSET1_B32;
} else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
Expand Down
47 changes: 42 additions & 5 deletions llvm/test/CodeGen/AMDGPU/s_cmp_0.ll
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,43 @@ define amdgpu_ps i32 @and64(i64 inreg %val0, i64 inreg %val1) {
ret i32 %zext
}

define amdgpu_ps i32 @and32_clear_one_bit(i32 inreg %val0) {
; CHECK-LABEL: and32_clear_one_bit:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_and_b32 s0, s0, 0x7fffffff
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s0
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: ; return to shader part epilog
%result = and i32 %val0, 2147483647
call void asm "; use $0", "s"(i32 %result)
%cmp = icmp ne i32 %result, 0
%zext = zext i1 %cmp to i32
ret i32 %zext
}

define amdgpu_ps i32 @and64_clear_one_bit(i64 inreg %val0) {
; CHECK-LABEL: and64_clear_one_bit:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_bitset0_b32 s0, 31
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; use s[0:1]
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; CHECK-NEXT: v_readfirstlane_b32 s0, v0
; CHECK-NEXT: ; return to shader part epilog
%result = and i64 %val0, -2147483649
call void asm "; use $0", "s"(i64 %result)
%cmp = icmp ne i64 %result, 0
%zext = zext i1 %cmp to i32
ret i32 %zext
}

define amdgpu_ps i32 @or32(i32 inreg %val0, i32 inreg %val1) {
; CHECK-LABEL: or32:
; CHECK: ; %bb.0:
Expand Down Expand Up @@ -623,14 +660,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
; CHECK-NEXT: s_cbranch_scc0 .LBB36_2
; CHECK-NEXT: s_cbranch_scc0 .LBB38_2
; CHECK-NEXT: ; %bb.1: ; %endif
; CHECK-NEXT: s_mov_b32 s0, 1
; CHECK-NEXT: s_branch .LBB36_3
; CHECK-NEXT: .LBB36_2: ; %if
; CHECK-NEXT: s_branch .LBB38_3
; CHECK-NEXT: .LBB38_2: ; %if
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_branch .LBB36_3
; CHECK-NEXT: .LBB36_3:
; CHECK-NEXT: s_branch .LBB38_3
; CHECK-NEXT: .LBB38_3:
%cmp = icmp ne ptr addrspace(4) @1, null
br i1 %cmp, label %endif, label %if

Expand Down
Loading