diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 9e7e1dbcea25d..44f62e213c9d3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -989,6 +989,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::FLOG2, {MVT::v2f16, MVT::v2bf16}, Expand); } + setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom); + // No FPOW or FREM in PTX. // Now deduce the information based on the above mentioned @@ -2652,6 +2654,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); case ISD::FRAMEADDR: return SDValue(); + case ISD::ADDRSPACECAST: + return LowerADDRSPACECAST(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::INTRINSIC_W_CHAIN: @@ -2767,6 +2771,17 @@ unsigned NVPTXTargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_Inline; } +SDValue NVPTXTargetLowering::LowerADDRSPACECAST(SDValue Op, + SelectionDAG &DAG) const { + AddrSpaceCastSDNode *N = cast(Op.getNode()); + unsigned SrcAS = N->getSrcAddressSpace(); + unsigned DestAS = N->getDestAddressSpace(); + if (SrcAS != llvm::ADDRESS_SPACE_GENERIC && + DestAS != llvm::ADDRESS_SPACE_GENERIC) + return DAG.getUNDEF(Op.getValueType()); + return Op; +} + // This function is almost a copy of SelectionDAG::expandVAArg(). // The only diff is that this one produces loads from local address space. SDValue NVPTXTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 5adf69d621552..74ec14ba5f8e3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -264,6 +264,7 @@ class NVPTXTargetLowering : public TargetLowering { const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; + SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll index 23428b3728674..0aa66d1fc45f3 100644 --- a/llvm/test/CodeGen/NVPTX/addrspacecast.ll +++ b/llvm/test/CodeGen/NVPTX/addrspacecast.ll @@ -1,15 +1,15 @@ -; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32 -; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64 -; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64 +; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32 +; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64 +; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64 ; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %} ; ALL-LABEL: conv1 define i32 @conv1(ptr addrspace(1) %ptr) { -; G32: cvta.global.u32 +; CLS32: cvta.global.u32 ; ALL-NOT: cvt.u64.u32 -; G64: cvta.global.u64 +; CLS64: cvta.global.u64 ; ALL: ld.u32 %genptr = addrspacecast ptr addrspace(1) %ptr to ptr %val = load i32, ptr %genptr @@ -99,6 +99,17 @@ define i32 @conv8(ptr %ptr) { ret i32 %val } +; ALL-LABEL: conv9 +define i32 @conv9(ptr addrspace(1) %ptr) { +; CLS32: // implicit-def: %[[ADDR:r[0-9]+]] +; PTRCONV: // implicit-def: %[[ADDR:r[0-9]+]] +; NOPTRCONV: // implicit-def: %[[ADDR:rd[0-9]+]] +; ALL: ld.shared.u32 %r{{[0-9]+}}, [%[[ADDR]]] + %specptr = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(3) + %val = load i32, ptr addrspace(3) %specptr + ret i32 %val +} + ; Check that we support addrspacecast when splitting the vector ; result (<2 x ptr> => 2 x <1 x ptr>). ; This also checks that scalarization works for addrspacecast