Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2030,6 +2030,21 @@ bool TargetLowering::SimplifyDemandedBits(
Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
}

// If we are only demanding sign bits then we can use the shift source
// directly.
if (std::optional<uint64_t> MaxSA =
TLO.DAG.getValidMaximumShiftAmount(Op, DemandedElts, Depth + 1)) {
unsigned ShAmt = *MaxSA;
// Must already be signbits in DemandedBits bounds, and can't demand any
// shifted in zeroes.
if (DemandedBits.countl_zero() >= ShAmt) {
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
if (DemandedBits.countr_zero() >= (BitWidth - NumSignBits))
return TLO.CombineTo(Op, Op0);
}
}

// Try to match AVG patterns (after shift simplification).
if (SDValue AVG = combineShiftToAVG(Op, TLO, *this, DemandedBits,
DemandedElts, Depth + 1))
Expand Down
144 changes: 72 additions & 72 deletions llvm/test/CodeGen/NVPTX/load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -167,25 +167,25 @@ define void @generic_4xi8(ptr %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi8_param_0];
; CHECK-NEXT: ld.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr %a
Expand Down Expand Up @@ -511,25 +511,25 @@ define void @generic_volatile_4xi8(ptr %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi8_param_0];
; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.volatile.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr %a
Expand Down Expand Up @@ -1416,25 +1416,25 @@ define void @global_4xi8(ptr addrspace(1) %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi8_param_0];
; CHECK-NEXT: ld.global.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.global.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(1) %a
Expand Down Expand Up @@ -1741,25 +1741,25 @@ define void @global_volatile_4xi8(ptr addrspace(1) %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi8_param_0];
; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(1) %a
Expand Down Expand Up @@ -2788,25 +2788,25 @@ define void @shared_4xi8(ptr addrspace(3) %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi8_param_0];
; CHECK-NEXT: ld.shared.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.shared.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(3) %a
Expand Down Expand Up @@ -3113,25 +3113,25 @@ define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi8_param_0];
; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(3) %a
Expand Down Expand Up @@ -4018,25 +4018,25 @@ define void @local_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi8_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(5) %a
Expand Down Expand Up @@ -4343,25 +4343,25 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi8_param_0];
; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: bfe.u32 %r2, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
; CHECK-NEXT: bfe.u32 %r4, %r1, 8, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: bfi.b32 %r6, %r5, %r3, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 16, 8;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(5) %a
Expand Down
Loading
Loading