diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c51729e224bf5..75a2bb157c99b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "NVPTXUtilities.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsNVPTX.h" @@ -2449,6 +2450,11 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) { return true; } +static inline bool isAddLike(const SDValue V) { + return V.getOpcode() == ISD::ADD || + (V->getOpcode() == ISD::OR && V->getFlags().hasDisjoint()); +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { @@ -2475,7 +2481,7 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { // symbol+offset bool NVPTXDAGToDAGISel::SelectADDRsi_imp( SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { - if (Addr.getOpcode() == ISD::ADD) { + if (isAddLike(Addr)) { if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { @@ -2512,7 +2518,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp( Addr.getOpcode() == ISD::TargetGlobalAddress) return false; // direct calls. - if (Addr.getOpcode() == ISD::ADD) { + if (isAddLike(Addr)) { if (SelectDirectAddr(Addr.getOperand(0), Addr)) { return false; } diff --git a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll new file mode 100644 index 0000000000000..1b1bb91d5c79e --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} +target triple = "nvptx64-nvidia-cuda" + +@a = external global ptr align 16 + +define i32 @test_disjoint_or_addr(i16 %a) { +; CHECK-LABEL: test_disjoint_or_addr( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u64 %rd1, a; +; CHECK-NEXT: cvta.global.u64 %rd2, %rd1; +; CHECK-NEXT: ld.u32 %r1, [%rd2+8]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %a1 = ptrtoint ptr @a to i64 + %a2 = or disjoint i64 %a1, 8 + %a3 = inttoptr i64 %a2 to ptr + %v = load i32, ptr %a3 + ret i32 %v +} diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 9cfe9192772b8..69d51fd1fcc1b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -29,7 +29,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly ; PTX-NEXT: .reg .pred %p<2>; ; PTX-NEXT: .reg .b16 %rs<3>; ; PTX-NEXT: .reg .b32 %r<11>; -; PTX-NEXT: .reg .b64 %rd<10>; +; PTX-NEXT: .reg .b64 %rd<9>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: // %entry ; PTX-NEXT: mov.u64 %SPL, __local_depot0; @@ -38,23 +38,22 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly ; PTX-NEXT: and.b16 %rs2, %rs1, 1; ; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1; ; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2]; -; PTX-NEXT: add.u64 %rd2, %SP, 0; -; PTX-NEXT: or.b64 %rd3, %rd2, 8; -; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8]; -; PTX-NEXT: st.u64 [%rd3], %rd4; -; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0]; -; PTX-NEXT: st.u64 [%SP], %rd5; -; PTX-NEXT: mov.u64 %rd6, gi; -; PTX-NEXT: cvta.global.u64 %rd7, %rd6; -; PTX-NEXT: selp.b64 %rd8, %rd2, %rd7, %p1; -; PTX-NEXT: add.s64 %rd9, %rd8, %rd1; -; PTX-NEXT: ld.u8 %r1, [%rd9]; -; PTX-NEXT: ld.u8 %r2, [%rd9+1]; +; PTX-NEXT: ld.param.u64 %rd2, [non_kernel_function_param_0+8]; +; PTX-NEXT: st.u64 [%SP+8], %rd2; +; PTX-NEXT: ld.param.u64 %rd3, [non_kernel_function_param_0]; +; PTX-NEXT: st.u64 [%SP], %rd3; +; PTX-NEXT: mov.u64 %rd4, gi; +; PTX-NEXT: cvta.global.u64 %rd5, %rd4; +; PTX-NEXT: add.u64 %rd6, %SP, 0; +; PTX-NEXT: selp.b64 %rd7, %rd6, %rd5, %p1; +; PTX-NEXT: add.s64 %rd8, %rd7, %rd1; +; PTX-NEXT: ld.u8 %r1, [%rd8]; +; PTX-NEXT: ld.u8 %r2, [%rd8+1]; ; PTX-NEXT: shl.b32 %r3, %r2, 8; ; PTX-NEXT: or.b32 %r4, %r3, %r1; -; PTX-NEXT: ld.u8 %r5, [%rd9+2]; +; PTX-NEXT: ld.u8 %r5, [%rd8+2]; ; PTX-NEXT: shl.b32 %r6, %r5, 16; -; PTX-NEXT: ld.u8 %r7, [%rd9+3]; +; PTX-NEXT: ld.u8 %r7, [%rd8+3]; ; PTX-NEXT: shl.b32 %r8, %r7, 24; ; PTX-NEXT: or.b32 %r9, %r8, %r6; ; PTX-NEXT: or.b32 %r10, %r9, %r4; diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index cb54812dea6d9..f7ed690efabcf 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -153,7 +153,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: .reg .b64 %SPL; ; CHECK-PTX-NEXT: .reg .b16 %rs<6>; ; CHECK-PTX-NEXT: .reg .b32 %r<7>; -; CHECK-PTX-NEXT: .reg .b64 %rd<11>; +; CHECK-PTX-NEXT: .reg .b64 %rd<7>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry ; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2; @@ -163,24 +163,20 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7; ; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8; ; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3]; -; CHECK-PTX-NEXT: or.b64 %rd4, %rd3, 4; -; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4]; -; CHECK-PTX-NEXT: or.b64 %rd5, %rd3, 5; -; CHECK-PTX-NEXT: or.b64 %rd6, %rd3, 7; -; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd6]; +; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4]; +; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7]; ; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1; -; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5]; -; CHECK-PTX-NEXT: or.b64 %rd7, %rd3, 6; -; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd7]; +; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5]; +; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6]; ; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8; ; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2; ; CHECK-PTX-NEXT: st.u16 [%SP], %rs5; -; CHECK-PTX-NEXT: ld.u64 %rd8, [%rd3+8]; +; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8]; ; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2; ; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3; -; CHECK-PTX-NEXT: cvt.u64.u32 %rd9, %r5; -; CHECK-PTX-NEXT: add.s64 %rd10, %rd9, %rd8; -; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd10; +; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5; +; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4; +; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6; ; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-PTX-NEXT: ret; entry: @@ -219,7 +215,7 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: .reg .b64 %SPL; ; CHECK-PTX-NEXT: .reg .b16 %rs<10>; ; CHECK-PTX-NEXT: .reg .b32 %r<4>; -; CHECK-PTX-NEXT: .reg .b64 %rd<8>; +; CHECK-PTX-NEXT: .reg .b64 %rd<7>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry ; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3; @@ -240,17 +236,16 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: st.u16 [%SP], %rs8; ; CHECK-PTX-NEXT: mov.b32 %r1, 1; ; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1; -; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 8; -; CHECK-PTX-NEXT: or.b64 %rd6, %rd5, 4; ; CHECK-PTX-NEXT: mov.b16 %rs9, 1; -; CHECK-PTX-NEXT: st.u8 [%rd6], %rs9; -; CHECK-PTX-NEXT: mov.b64 %rd7, 1; -; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7; +; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9; +; CHECK-PTX-NEXT: mov.b64 %rd5, 1; +; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5; +; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8; ; CHECK-PTX-NEXT: { // callseq 1, 0 ; CHECK-PTX-NEXT: .param .b32 param0; ; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6; ; CHECK-PTX-NEXT: .param .b32 retval0; ; CHECK-PTX-NEXT: call.uni (retval0), ; CHECK-PTX-NEXT: variadics2,