diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0461ed4712221..4ce8c508c5f2b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2039,6 +2039,13 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { Val = AndLHS; Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32); Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32); + + // If pre-shift AND includes the sign bit in the bitfield, we must use + // signed BFE to replicate that bit during bitfield extraction. If the + // sign bit is not part of the mask, unsigned BFE will zero out upper bits + // of the result + if (N->getOpcode() == ISD::SRA) + IsSigned = (ShiftAmt + NumBits) == Val.getValueSizeInBits(); } else if (LHS->getOpcode() == ISD::SHL) { // Here, we have a pattern like: // diff --git a/llvm/test/CodeGen/NVPTX/bfe.ll b/llvm/test/CodeGen/NVPTX/bfe.ll index 52e35691f9023..0392f7786731a 100644 --- a/llvm/test/CodeGen/NVPTX/bfe.ll +++ b/llvm/test/CodeGen/NVPTX/bfe.ll @@ -1,67 +1,221 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | %ptxas-verify %} +target triple = "nvptx64-nvidia-cuda" -; CHECK: bfe0 define i32 @bfe0(i32 %a) { -; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 4, 4 -; CHECK-NOT: shr -; CHECK-NOT: and +; CHECK-LABEL: bfe0( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [bfe0_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 4, 4; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; %val0 = ashr i32 %a, 4 %val1 = and i32 %val0, 15 ret i32 %val1 } -; CHECK: bfe1 define i32 @bfe1(i32 %a) { -; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 3, 3 -; CHECK-NOT: shr -; CHECK-NOT: and +; CHECK-LABEL: bfe1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [bfe1_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 3, 3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; %val0 = ashr i32 %a, 3 %val1 = and i32 %val0, 7 ret i32 %val1 } -; CHECK: bfe2 define i32 @bfe2(i32 %a) { -; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 5, 3 -; CHECK-NOT: shr -; CHECK-NOT: and +; CHECK-LABEL: bfe2( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [bfe2_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; %val0 = ashr i32 %a, 5 %val1 = and i32 %val0, 7 ret i32 %val1 } -; CHECK-LABEL: no_bfe_on_32bit_overflow define i32 @no_bfe_on_32bit_overflow(i32 %a) { -; CHECK-NOT: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 31, 4 +; CHECK-LABEL: no_bfe_on_32bit_overflow( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_param_0]; +; CHECK-NEXT: shr.s32 %r2, %r1, 31; +; CHECK-NEXT: and.b32 %r3, %r2, 15; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %val0 = ashr i32 %a, 31 %val1 = and i32 %val0, 15 ret i32 %val1 } -; CHECK-LABEL: no_bfe_on_32bit_overflow_shr_and_pair define i32 @no_bfe_on_32bit_overflow_shr_and_pair(i32 %a) { -; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, 31 -; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 15 +; CHECK-LABEL: no_bfe_on_32bit_overflow_shr_and_pair( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_shr_and_pair_param_0]; +; CHECK-NEXT: shr.s32 %r2, %r1, 31; +; CHECK-NEXT: and.b32 %r3, %r2, 15; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %val0 = ashr i32 %a, 31 %val1 = and i32 %val0, 15 ret i32 %val1 } -; CHECK-LABEL: no_bfe_on_64bit_overflow define i64 @no_bfe_on_64bit_overflow(i64 %a) { -; CHECK-NOT: bfe.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, 63, 3 +; CHECK-LABEL: no_bfe_on_64bit_overflow( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_param_0]; +; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; +; CHECK-NEXT: and.b64 %rd3, %rd2, 7; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %val0 = ashr i64 %a, 63 %val1 = and i64 %val0, 7 ret i64 %val1 } -; CHECK-LABEL: no_bfe_on_64bit_overflow_shr_and_pair define i64 @no_bfe_on_64bit_overflow_shr_and_pair(i64 %a) { -; CHECK: shr.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, 63 -; CHECK: and.b64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, 7 +; CHECK-LABEL: no_bfe_on_64bit_overflow_shr_and_pair( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_shr_and_pair_param_0]; +; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; +; CHECK-NEXT: and.b64 %rd3, %rd2, 7; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %val0 = ashr i64 %a, 63 %val1 = and i64 %val0, 7 ret i64 %val1 } + +define i32 @bfe_ashr_signed_32(i32 %x) { +; CHECK-O3-LABEL: bfe_ashr_signed_32( +; CHECK-O3: { +; CHECK-O3-NEXT: .reg .b32 %r<3>; +; CHECK-O3-EMPTY: +; CHECK-O3-NEXT: // %bb.0: +; CHECK-O3-NEXT: ld.param.u16 %r1, [bfe_ashr_signed_32_param_0+2]; +; CHECK-O3-NEXT: bfe.s32 %r2, %r1, 4, 12; +; CHECK-O3-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-O3-NEXT: ret; +; +; CHECK-O0-LABEL: bfe_ashr_signed_32( +; CHECK-O0: { +; CHECK-O0-NEXT: .reg .b32 %r<3>; +; CHECK-O0-EMPTY: +; CHECK-O0-NEXT: // %bb.0: +; CHECK-O0-NEXT: ld.param.u32 %r1, [bfe_ashr_signed_32_param_0]; +; CHECK-O0-NEXT: bfe.s32 %r2, %r1, 20, 12; +; CHECK-O0-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-O0-NEXT: ret; + %and = and i32 %x, -65536 + %shr = ashr exact i32 %and, 20 + ret i32 %shr +} + +define i32 @bfe_ashr_unsigned_32(i32 %x) { +; CHECK-LABEL: bfe_ashr_unsigned_32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [bfe_ashr_unsigned_32_param_0]; +; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %and = and i32 %x, 2047 + %shr = ashr exact i32 %and, 5 + ret i32 %shr +} + +define i64 @bfe_ashr_signed_64(i64 %x) { +; CHECK-LABEL: bfe_ashr_signed_64( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_signed_64_param_0]; +; CHECK-NEXT: bfe.s64 %rd2, %rd1, 16, 48; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; + %and = and i64 %x, -65536 + %shr = ashr exact i64 %and, 16 + ret i64 %shr +} + +define i64 @bfe_ashr_unsigned_64(i64 %x) { +; CHECK-LABEL: bfe_ashr_unsigned_64( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_unsigned_64_param_0]; +; CHECK-NEXT: bfe.u64 %rd2, %rd1, 5, 6; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; + %and = and i64 %x, 2047 + %shr = ashr exact i64 %and, 5 + ret i64 %shr +} + +define i32 @bfe3(i128 %a) { +; CHECK-LABEL: bfe3( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe3_param_0]; +; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; +; CHECK-NEXT: bfe.s32 %r2, %r1, 15, 17; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %trunc = trunc i128 %a to i32 + %and = and i32 %trunc, -32768 + %shr = ashr exact i32 %and, 15 + ret i32 %shr +} + +define i64 @bfe4(i128 %a) { +; CHECK-LABEL: bfe4( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe4_param_0]; +; CHECK-NEXT: bfe.s64 %rd3, %rd1, 17, 47; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; + %trunc = trunc i128 %a to i64 + %and = and i64 %trunc, -131072 + %shr = ashr exact i64 %and, 17 + ret i64 %shr +} +