diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 5ee1bee49247c..5086633a1919b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -151,12 +151,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) { if (tryLoadParam(N)) return; break; - case NVPTXISD::StoreRetval: - case NVPTXISD::StoreRetvalV2: - case NVPTXISD::StoreRetvalV4: - if (tryStoreRetval(N)) - return; - break; case NVPTXISD::StoreParam: case NVPTXISD::StoreParamV2: case NVPTXISD::StoreParamV4: @@ -1504,84 +1498,6 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) { return true; } -bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) { - SDLoc DL(N); - SDValue Chain = N->getOperand(0); - SDValue Offset = N->getOperand(1); - unsigned OffsetVal = Offset->getAsZExtVal(); - MemSDNode *Mem = cast(N); - - // How many elements do we have? - unsigned NumElts = 1; - switch (N->getOpcode()) { - default: - return false; - case NVPTXISD::StoreRetval: - NumElts = 1; - break; - case NVPTXISD::StoreRetvalV2: - NumElts = 2; - break; - case NVPTXISD::StoreRetvalV4: - NumElts = 4; - break; - } - - // Build vector of operands - SmallVector Ops; - for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(N->getOperand(i + 2)); - Ops.append({CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32), Chain}); - - // Determine target opcode - // If we have an i1, use an 8-bit store. The lowering code in - // NVPTXISelLowering will have already emitted an upcast. - std::optional Opcode = 0; - switch (NumElts) { - default: - return false; - case 1: - Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, - NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, - NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64); - if (Opcode == NVPTX::StoreRetvalI8) { - // Fine tune the opcode depending on the size of the operand. - // This helps to avoid creating redundant COPY instructions in - // InstrEmitter::AddRegisterOperand(). - switch (Ops[0].getSimpleValueType().SimpleTy) { - default: - break; - case MVT::i32: - Opcode = NVPTX::StoreRetvalI8TruncI32; - break; - case MVT::i64: - Opcode = NVPTX::StoreRetvalI8TruncI64; - break; - } - } - break; - case 2: - Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, - NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, - NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64); - break; - case 4: - Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, - NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, - NVPTX::StoreRetvalV4I32, {/* no v4i64 */}); - break; - } - if (!Opcode) - return false; - - SDNode *Ret = CurDAG->getMachineNode(*Opcode, DL, MVT::Other, Ops); - MachineMemOperand *MemRef = cast(N)->getMemOperand(); - CurDAG->setNodeMemRefs(cast(Ret), {MemRef}); - - ReplaceNode(N, Ret); - return true; -} - // Helpers for constructing opcode (ex: NVPTX::StoreParamV4F32_iiri) #define getOpcV2H(ty, opKind0, opKind1) \ NVPTX::StoreParamV2##ty##_##opKind0##opKind1 diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index a785e1513682e..0e4dec1adca67 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -79,7 +79,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { bool tryStore(SDNode *N); bool tryStoreVector(SDNode *N); bool tryLoadParam(SDNode *N); - bool tryStoreRetval(SDNode *N); bool tryStoreParam(SDNode *N); bool tryFence(SDNode *N); void SelectAddrSpaceCast(SDNode *N); diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index b924a1f5ac93c..d9192fbfceff1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -370,7 +370,7 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) { // v2i8 is promoted to v2i16 NumElts = 1; - EltVT = MVT::v2i16; + EltVT = MVT::v2i8; } for (unsigned j = 0; j != NumElts; ++j) { ValueVTs.push_back(EltVT); @@ -1065,9 +1065,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(NVPTXISD::StoreParamV2) MAKE_CASE(NVPTXISD::StoreParamV4) MAKE_CASE(NVPTXISD::MoveParam) - MAKE_CASE(NVPTXISD::StoreRetval) - MAKE_CASE(NVPTXISD::StoreRetvalV2) - MAKE_CASE(NVPTXISD::StoreRetvalV4) MAKE_CASE(NVPTXISD::UNPACK_VECTOR) MAKE_CASE(NVPTXISD::BUILD_VECTOR) MAKE_CASE(NVPTXISD::CallPrototype) @@ -1438,7 +1435,11 @@ static MachinePointerInfo refinePtrAS(SDValue &Ptr, SelectionDAG &DAG, } static ISD::NodeType getExtOpcode(const ISD::ArgFlagsTy &Flags) { - return Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + if (Flags.isSExt()) + return ISD::SIGN_EXTEND; + if (Flags.isZExt()) + return ISD::ZERO_EXTEND; + return ISD::ANY_EXTEND; } SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, @@ -3373,10 +3374,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( } InVals.push_back(P); } else { - bool aggregateIsPacked = false; - if (StructType *STy = dyn_cast(Ty)) - aggregateIsPacked = STy->isPacked(); - SmallVector VTs; SmallVector Offsets; ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0); @@ -3389,9 +3386,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); unsigned I = 0; for (const unsigned NumElts : VectorInfo) { - const EVT EltVT = VTs[I]; // i1 is loaded/stored as i8 - const EVT LoadVT = EltVT == MVT::i1 ? MVT::i8 : EltVT; + const EVT LoadVT = VTs[I] == MVT::i1 ? MVT::i8 : VTs[I]; // If the element is a packed type (ex. v2f16, v4i8, etc) holding // multiple elements. const unsigned PackingAmt = @@ -3403,14 +3399,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( SDValue VecAddr = DAG.getObjectPtrOffset( dl, ArgSymbol, TypeSize::getFixed(Offsets[I])); - const MaybeAlign PartAlign = [&]() -> MaybeAlign { - if (aggregateIsPacked) - return Align(1); - if (NumElts != 1) - return std::nullopt; - Align PartAlign = DAG.getEVTAlign(EltVT); - return commonAlignment(PartAlign, Offsets[I]); - }(); + const MaybeAlign PartAlign = commonAlignment(ArgAlign, Offsets[I]); SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr, MachinePointerInfo(ADDRESS_SPACE_PARAM), PartAlign, @@ -3419,23 +3408,22 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (P.getNode()) P.getNode()->setIROrder(Arg.getArgNo() + 1); for (const unsigned J : llvm::seq(NumElts)) { - SDValue Elt = DAG.getNode(LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR - : ISD::EXTRACT_VECTOR_ELT, - dl, LoadVT, P, - DAG.getIntPtrConstant(J * PackingAmt, dl)); + SDValue Elt = DAG.getNode( + LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR + : ISD::EXTRACT_VECTOR_ELT, + dl, LoadVT, P, DAG.getVectorIdxConstant(J * PackingAmt, dl)); // Extend or truncate the element if necessary (e.g. an i8 is loaded // into an i16 register) - const EVT ExpactedVT = ArgIns[I + J].VT; - assert((Elt.getValueType().bitsEq(ExpactedVT) || - (ExpactedVT.isScalarInteger() && - Elt.getValueType().isScalarInteger())) && + const EVT ExpectedVT = ArgIns[I + J].VT; + assert((Elt.getValueType() == ExpectedVT || + (ExpectedVT.isInteger() && Elt.getValueType().isInteger())) && "Non-integer argument type size mismatch"); - if (ExpactedVT.bitsGT(Elt.getValueType())) - Elt = DAG.getNode(getExtOpcode(ArgIns[I + J].Flags), dl, ExpactedVT, + if (ExpectedVT.bitsGT(Elt.getValueType())) + Elt = DAG.getNode(getExtOpcode(ArgIns[I + J].Flags), dl, ExpectedVT, Elt); - else if (ExpactedVT.bitsLT(Elt.getValueType())) - Elt = DAG.getNode(ISD::TRUNCATE, dl, ExpactedVT, Elt); + else if (ExpectedVT.bitsLT(Elt.getValueType())) + Elt = DAG.getNode(ISD::TRUNCATE, dl, ExpectedVT, Elt); InVals.push_back(Elt); } I += NumElts; @@ -3449,33 +3437,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( return Chain; } -// Use byte-store when the param adress of the return value is unaligned. -// This may happen when the return value is a field of a packed structure. -static SDValue LowerUnalignedStoreRet(SelectionDAG &DAG, SDValue Chain, - uint64_t Offset, EVT ElementType, - SDValue RetVal, const SDLoc &dl) { - // Bit logic only works on integer types - if (adjustElementType(ElementType)) - RetVal = DAG.getNode(ISD::BITCAST, dl, ElementType, RetVal); - - // Store each byte - for (unsigned i = 0, n = ElementType.getSizeInBits() / 8; i < n; i++) { - // Shift the byte to the last byte position - SDValue ShiftVal = DAG.getNode(ISD::SRL, dl, ElementType, RetVal, - DAG.getConstant(i * 8, dl, MVT::i32)); - SDValue StoreOperands[] = {Chain, DAG.getConstant(Offset + i, dl, MVT::i32), - ShiftVal}; - // Trunc store only the last byte by using - // st.param.b8 - // The register type can be larger than b8. - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, - DAG.getVTList(MVT::Other), StoreOperands, - MVT::i8, MachinePointerInfo(), std::nullopt, - MachineMemOperand::MOStore); - } - return Chain; -} - SDValue NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -3497,10 +3458,6 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); - for (const unsigned I : llvm::seq(VTs.size())) - if (const auto PromotedVT = PromoteScalarIntegerPTX(VTs[I])) - VTs[I] = *PromotedVT; - // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether // they are signed or unsigned types. @@ -3512,12 +3469,20 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, assert(!PromoteScalarIntegerPTX(RetVal.getValueType()) && "OutVal type should always be legal"); - if (ExtendIntegerRetVal) { - RetVal = DAG.getNode(getExtOpcode(Outs[I].Flags), dl, MVT::i32, RetVal); - } else if (RetVal.getValueSizeInBits() < 16) { - // Use 16-bit registers for small load-stores as it's the - // smallest general purpose register size supported by NVPTX. - RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal); + EVT VTI = VTs[I]; + if (const auto PromotedVT = PromoteScalarIntegerPTX(VTI)) + VTI = *PromotedVT; + + const EVT StoreVT = + ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI); + + assert((RetVal.getValueType() == StoreVT || + (StoreVT.isInteger() && RetVal.getValueType().isInteger())) && + "Non-integer argument type size mismatch"); + if (StoreVT.bitsGT(RetVal.getValueType())) { + RetVal = DAG.getNode(getExtOpcode(Outs[I].Flags), dl, StoreVT, RetVal); + } else if (StoreVT.bitsLT(RetVal.getValueType())) { + RetVal = DAG.getNode(ISD::TRUNCATE, dl, StoreVT, RetVal); } return RetVal; }; @@ -3526,45 +3491,34 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); unsigned I = 0; for (const unsigned NumElts : VectorInfo) { - const Align CurrentAlign = commonAlignment(RetAlign, Offsets[I]); - if (NumElts == 1 && RetTy->isAggregateType() && - CurrentAlign < DAG.getEVTAlign(VTs[I])) { - Chain = LowerUnalignedStoreRet(DAG, Chain, Offsets[I], VTs[I], - GetRetVal(I), dl); - - // The call to LowerUnalignedStoreRet inserted the necessary SDAG nodes - // into the graph, so just move on to the next element. - I++; - continue; - } + const MaybeAlign CurrentAlign = ExtendIntegerRetVal + ? MaybeAlign(std::nullopt) + : commonAlignment(RetAlign, Offsets[I]); - SmallVector StoreOperands{ - Chain, DAG.getConstant(Offsets[I], dl, MVT::i32)}; - - for (const unsigned J : llvm::seq(NumElts)) - StoreOperands.push_back(GetRetVal(I + J)); + SDValue Val; + if (NumElts == 1) { + Val = GetRetVal(I); + } else { + SmallVector StoreVals; + for (const unsigned J : llvm::seq(NumElts)) { + SDValue ValJ = GetRetVal(I + J); + if (ValJ.getValueType().isVector()) + DAG.ExtractVectorElements(ValJ, StoreVals); + else + StoreVals.push_back(ValJ); + } - NVPTXISD::NodeType Op; - switch (NumElts) { - case 1: - Op = NVPTXISD::StoreRetval; - break; - case 2: - Op = NVPTXISD::StoreRetvalV2; - break; - case 4: - Op = NVPTXISD::StoreRetvalV4; - break; - default: - llvm_unreachable("Invalid vector info."); + EVT VT = EVT::getVectorVT(F.getContext(), StoreVals[0].getValueType(), + StoreVals.size()); + Val = DAG.getBuildVector(VT, dl, StoreVals); } - // Adjust type of load/store op if we've extended the scalar - // return value. - EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[I]; - Chain = DAG.getMemIntrinsicNode( - Op, dl, DAG.getVTList(MVT::Other), StoreOperands, TheStoreType, - MachinePointerInfo(), CurrentAlign, MachineMemOperand::MOStore); + SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32); + SDValue Ptr = + DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I])); + + Chain = DAG.getStore(Chain, dl, Val, Ptr, + MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign); I += NumElts; } @@ -5120,19 +5074,12 @@ static SDValue combinePackingMovIntoStore(SDNode *N, case NVPTXISD::StoreParamV2: Opcode = NVPTXISD::StoreParamV4; break; - case NVPTXISD::StoreRetval: - Opcode = NVPTXISD::StoreRetvalV2; - break; - case NVPTXISD::StoreRetvalV2: - Opcode = NVPTXISD::StoreRetvalV4; - break; case NVPTXISD::StoreV2: MemVT = ST->getMemoryVT(); Opcode = NVPTXISD::StoreV4; break; case NVPTXISD::StoreV4: case NVPTXISD::StoreParamV4: - case NVPTXISD::StoreRetvalV4: case NVPTXISD::StoreV8: // PTX doesn't support the next doubling of operands return SDValue(); @@ -5201,12 +5148,6 @@ static SDValue PerformStoreParamCombine(SDNode *N, return PerformStoreCombineHelper(N, DCI, 3, 1); } -static SDValue PerformStoreRetvalCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - // Operands from the 2nd to the last one are the values to be stored - return PerformStoreCombineHelper(N, DCI, 2, 0); -} - /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, @@ -5840,10 +5781,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, case NVPTXISD::LoadV2: case NVPTXISD::LoadV4: return combineUnpackingMovIntoLoad(N, DCI); - case NVPTXISD::StoreRetval: - case NVPTXISD::StoreRetvalV2: - case NVPTXISD::StoreRetvalV4: - return PerformStoreRetvalCombine(N, DCI); case NVPTXISD::StoreParam: case NVPTXISD::StoreParamV2: case NVPTXISD::StoreParamV4: diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 5efdd1582214a..3a8091fecfde1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -86,10 +86,7 @@ enum NodeType : unsigned { StoreParam, StoreParamV2, StoreParamV4, - StoreRetval, - StoreRetvalV2, - StoreRetvalV4, - LAST_MEMORY_OPCODE = StoreRetvalV4, + LAST_MEMORY_OPCODE = StoreParamV4, }; } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index cb0275706ae3e..1a2515b7f66f3 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2001,9 +2001,6 @@ def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTStoreParamV2Profile : SDTypeProfile<0, 4, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTStoreParamV4Profile : SDTypeProfile<0, 6, [SDTCisInt<0>, SDTCisInt<1>]>; def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; -def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>; -def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>; -def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>; def SDTProxyRegProfile : SDTypeProfile<1, 1, []>; def DeclareParam : @@ -2040,15 +2037,6 @@ def StoreParamV4 : [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; -def StoreRetval : - SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile, - [SDNPHasChain, SDNPSideEffect]>; -def StoreRetvalV2 : - SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile, - [SDNPHasChain, SDNPSideEffect]>; -def StoreRetvalV4 : - SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile, - [SDNPHasChain, SDNPSideEffect]>; def ProxyReg : SDNode<"NVPTXISD::ProxyReg", SDTProxyRegProfile, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; @@ -2123,25 +2111,6 @@ let mayStore = true in { " \t[param$a$b], {{$val1, $val2, $val3, $val4}};", []>; } - - class StoreRetvalInst : - NVPTXInst<(outs), (ins regclass:$val, Offseti32imm:$a), - !strconcat("st.param", opstr, " \t[func_retval0$a], $val;"), - []>; - - class StoreRetvalV2Inst : - NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, Offseti32imm:$a), - !strconcat("st.param.v2", opstr, - " \t[func_retval0$a], {{$val, $val2}};"), - []>; - - class StoreRetvalV4Inst : - NVPTXInst<(outs), - (ins regclass:$val, regclass:$val2, regclass:$val3, - regclass:$val4, Offseti32imm:$a), - !strconcat("st.param.v4", opstr, - " \t[func_retval0$a], {{$val, $val2, $val3, $val4}};"), - []>; } /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns, @@ -2219,20 +2188,6 @@ defm StoreParamV2F64 : StoreParamV2Inst; defm StoreParamV4F32 : StoreParamV4Inst; -def StoreRetvalI64 : StoreRetvalInst; -def StoreRetvalI32 : StoreRetvalInst; -def StoreRetvalI16 : StoreRetvalInst; -def StoreRetvalI8 : StoreRetvalInst; -def StoreRetvalI8TruncI32 : StoreRetvalInst; -def StoreRetvalI8TruncI64 : StoreRetvalInst; -def StoreRetvalV2I64 : StoreRetvalV2Inst; -def StoreRetvalV2I32 : StoreRetvalV2Inst; -def StoreRetvalV2I16 : StoreRetvalV2Inst; -def StoreRetvalV2I8 : StoreRetvalV2Inst; -def StoreRetvalV4I32 : StoreRetvalV4Inst; -def StoreRetvalV4I16 : StoreRetvalV4Inst; -def StoreRetvalV4I8 : StoreRetvalV4Inst; - def DeclareRetMemInst : NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size), ".param .align $align .b8 retval0[$size];", diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index bb36b1df115d1..61c3ba2ee54ab 100644 --- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -50,7 +50,7 @@ body: | ; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00 %6 = FADD_rnf32ri %5, float 6.250000e+00 %7 = FMUL_rnf32rr %6, %4 - StoreRetvalI32 %7, 0 + ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101) Return ... --- @@ -76,6 +76,6 @@ body: | ; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000 %6 = FADD_rnf32ri %5, float 0x7FF8000000000000 %7 = FMUL_rnf32rr %6, %4 - StoreRetvalI32 %7, 0 + ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101) Return ... diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll index b7e6e8b85298a..e75dd20e46a60 100644 --- a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll +++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll @@ -14,7 +14,7 @@ define i1 @and_ord(float %a, float %b) { ; CHECK-NEXT: ld.param.b32 %r1, [and_ord_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [and_ord_param_1]; ; CHECK-NEXT: setp.num.f32 %p1, %r1, %r2; -; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %c = fcmp ord float %a, 0.0 @@ -33,7 +33,7 @@ define i1 @or_uno(float %a, float %b) { ; CHECK-NEXT: ld.param.b32 %r1, [or_uno_param_0]; ; CHECK-NEXT: ld.param.b32 %r2, [or_uno_param_1]; ; CHECK-NEXT: setp.nan.f32 %p1, %r1, %r2; -; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: selp.b32 %r3, -1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %c = fcmp uno float %a, 0.0 diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll index c8dc34e9de2ca..55ce3dfc46539 100644 --- a/llvm/test/CodeGen/NVPTX/atomics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics.ll @@ -424,7 +424,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) { ; CHECK-LABEL: atomicrmw_add_f16_generic( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b16 %rs<5>; +; CHECK-NEXT: .reg .b16 %rs<4>; ; CHECK-NEXT: .reg .b32 %r<20>; ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: @@ -458,8 +458,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) { ; CHECK-NEXT: @%p1 bra $L__BB24_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: shr.u32 %r18, %r5, %r1; -; CHECK-NEXT: cvt.u16.u32 %rs4, %r18; -; CHECK-NEXT: st.param.b16 [func_retval0], %rs4; +; CHECK-NEXT: st.param.b16 [func_retval0], %r18; ; CHECK-NEXT: ret; %ret = atomicrmw fadd ptr %addr, half %val seq_cst ret half %ret diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll index 0dc658757053c..f59f51c3c57d3 100644 --- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll @@ -17,8 +17,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) { ; SM70-LABEL: test_fadd( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<11>; +; SM70-NEXT: .reg .b32 %r<12>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %r1, [test_fadd_param_1]; @@ -32,8 +31,8 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) { ; SM70-NEXT: setp.nan.f32 %p1, %r5, %r5; ; SM70-NEXT: or.b32 %r9, %r5, 4194304; ; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r10; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r11, %r10, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r11; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fadd( @@ -81,8 +80,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) { ; SM70-LABEL: test_fsub( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<11>; +; SM70-NEXT: .reg .b32 %r<12>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %r1, [test_fsub_param_1]; @@ -96,8 +94,8 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) { ; SM70-NEXT: setp.nan.f32 %p1, %r5, %r5; ; SM70-NEXT: or.b32 %r9, %r5, 4194304; ; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r10; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r11, %r10, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r11; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fsub( @@ -551,8 +549,7 @@ define bfloat @test_fptrunc_float(float %a) #0 { ; SM70-LABEL: test_fptrunc_float( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<7>; +; SM70-NEXT: .reg .b32 %r<8>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b32 %r1, [test_fptrunc_float_param_0]; @@ -562,8 +559,8 @@ define bfloat @test_fptrunc_float(float %a) #0 { ; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1; ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r6; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r7, %r6, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r7; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fptrunc_float( @@ -606,8 +603,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM70-LABEL: test_fadd_imm_1( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<9>; +; SM70-NEXT: .reg .b32 %r<10>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %r1, [test_fadd_imm_1_param_0]; @@ -619,8 +615,8 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 { ; SM70-NEXT: setp.nan.f32 %p1, %r3, %r3; ; SM70-NEXT: or.b32 %r7, %r3, 4194304; ; SM70-NEXT: selp.b32 %r8, %r7, %r6, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r8; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r9, %r8, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r9; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_fadd_imm_1( @@ -692,10 +688,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM70-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r1; -; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r2; -; SM70-NEXT: mov.b32 {%rs5, %rs6}, %r3; -; SM70-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; SM70-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; SM70-NEXT: mov.b32 {%rs3, %rs4}, %r4; +; SM70-NEXT: mov.b32 {%rs5, %rs6}, %r1; +; SM70-NEXT: mov.b32 {%rs7, %rs8}, %r2; ; SM70-NEXT: cvt.u32.u16 %r5, %rs8; ; SM70-NEXT: shl.b32 %r6, %r5, 16; ; SM70-NEXT: cvt.u32.u16 %r7, %rs7; @@ -712,8 +708,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM70-NEXT: shl.b32 %r18, %r17, 16; ; SM70-NEXT: cvt.u32.u16 %r19, %rs1; ; SM70-NEXT: shl.b32 %r20, %r19, 16; -; SM70-NEXT: st.param.v4.b32 [func_retval0], {%r20, %r18, %r16, %r14}; -; SM70-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r10, %r8, %r6}; +; SM70-NEXT: st.param.v4.b32 [func_retval0+16], {%r20, %r18, %r16, %r14}; +; SM70-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r10, %r8, %r6}; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_extload_bf16x8( @@ -725,10 +721,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-NEXT: // %bb.0: ; SM80-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM80-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1; -; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r2; -; SM80-NEXT: mov.b32 {%rs5, %rs6}, %r3; -; SM80-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r4; +; SM80-NEXT: mov.b32 {%rs5, %rs6}, %r1; +; SM80-NEXT: mov.b32 {%rs7, %rs8}, %r2; ; SM80-NEXT: cvt.f32.bf16 %r5, %rs8; ; SM80-NEXT: cvt.f32.bf16 %r6, %rs7; ; SM80-NEXT: cvt.f32.bf16 %r7, %rs6; @@ -737,8 +733,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-NEXT: cvt.f32.bf16 %r10, %rs3; ; SM80-NEXT: cvt.f32.bf16 %r11, %rs2; ; SM80-NEXT: cvt.f32.bf16 %r12, %rs1; -; SM80-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; -; SM80-NEXT: st.param.v4.b32 [func_retval0+16], {%r8, %r7, %r6, %r5}; +; SM80-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9}; +; SM80-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; ; SM80-NEXT: ret; ; ; SM80-FTZ-LABEL: test_extload_bf16x8( @@ -750,10 +746,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-FTZ-NEXT: // %bb.0: ; SM80-FTZ-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM80-FTZ-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r1; -; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r2; -; SM80-FTZ-NEXT: mov.b32 {%rs5, %rs6}, %r3; -; SM80-FTZ-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r4; +; SM80-FTZ-NEXT: mov.b32 {%rs5, %rs6}, %r1; +; SM80-FTZ-NEXT: mov.b32 {%rs7, %rs8}, %r2; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r5, %rs8; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r6, %rs7; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r7, %rs6; @@ -762,8 +758,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r10, %rs3; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r11, %rs2; ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %r12, %rs1; -; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; -; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0+16], {%r8, %r7, %r6, %r5}; +; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9}; +; SM80-FTZ-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; ; SM80-FTZ-NEXT: ret; ; ; SM90-LABEL: test_extload_bf16x8( @@ -775,10 +771,10 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM90-NEXT: // %bb.0: ; SM90-NEXT: ld.param.b64 %rd1, [test_extload_bf16x8_param_0]; ; SM90-NEXT: ld.shared.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1]; -; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r1; -; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r2; -; SM90-NEXT: mov.b32 {%rs5, %rs6}, %r3; -; SM90-NEXT: mov.b32 {%rs7, %rs8}, %r4; +; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r4; +; SM90-NEXT: mov.b32 {%rs5, %rs6}, %r1; +; SM90-NEXT: mov.b32 {%rs7, %rs8}, %r2; ; SM90-NEXT: cvt.f32.bf16 %r5, %rs8; ; SM90-NEXT: cvt.f32.bf16 %r6, %rs7; ; SM90-NEXT: cvt.f32.bf16 %r7, %rs6; @@ -787,8 +783,8 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 { ; SM90-NEXT: cvt.f32.bf16 %r10, %rs3; ; SM90-NEXT: cvt.f32.bf16 %r11, %rs2; ; SM90-NEXT: cvt.f32.bf16 %r12, %rs1; -; SM90-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; -; SM90-NEXT: st.param.v4.b32 [func_retval0+16], {%r8, %r7, %r6, %r5}; +; SM90-NEXT: st.param.v4.b32 [func_retval0+16], {%r12, %r11, %r10, %r9}; +; SM90-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; ; SM90-NEXT: ret; %load = load <8 x bfloat>, ptr addrspace(3) %arg, align 16 %res = fpext <8 x bfloat> %load to <8 x float> @@ -909,8 +905,8 @@ define bfloat @test_sitofp_i16(i16 %a) { ; SM70-LABEL: test_sitofp_i16( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<3>; -; SM70-NEXT: .reg .b32 %r<7>; +; SM70-NEXT: .reg .b16 %rs<2>; +; SM70-NEXT: .reg .b32 %r<8>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %rs1, [test_sitofp_i16_param_0]; @@ -921,8 +917,8 @@ define bfloat @test_sitofp_i16(i16 %a) { ; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1; ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; -; SM70-NEXT: mov.b32 {_, %rs2}, %r6; -; SM70-NEXT: st.param.b16 [func_retval0], %rs2; +; SM70-NEXT: shr.u32 %r7, %r6, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r7; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_sitofp_i16( @@ -966,8 +962,8 @@ define bfloat @test_uitofp_i8(i8 %a) { ; SM70-LABEL: test_uitofp_i8( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<3>; -; SM70-NEXT: .reg .b32 %r<7>; +; SM70-NEXT: .reg .b16 %rs<2>; +; SM70-NEXT: .reg .b32 %r<8>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b8 %rs1, [test_uitofp_i8_param_0]; @@ -978,8 +974,8 @@ define bfloat @test_uitofp_i8(i8 %a) { ; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1; ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; -; SM70-NEXT: mov.b32 {_, %rs2}, %r6; -; SM70-NEXT: st.param.b16 [func_retval0], %rs2; +; SM70-NEXT: shr.u32 %r7, %r6, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r7; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i8( @@ -1023,8 +1019,8 @@ define bfloat @test_uitofp_i1(i1 %a) { ; SM70-LABEL: test_uitofp_i1( ; SM70: { ; SM70-NEXT: .reg .pred %p<3>; -; SM70-NEXT: .reg .b16 %rs<4>; -; SM70-NEXT: .reg .b32 %r<8>; +; SM70-NEXT: .reg .b16 %rs<3>; +; SM70-NEXT: .reg .b32 %r<9>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b8 %rs1, [test_uitofp_i1_param_0]; @@ -1038,8 +1034,8 @@ define bfloat @test_uitofp_i1(i1 %a) { ; SM70-NEXT: setp.nan.f32 %p2, %r2, %r2; ; SM70-NEXT: or.b32 %r6, %r2, 4194304; ; SM70-NEXT: selp.b32 %r7, %r6, %r5, %p2; -; SM70-NEXT: mov.b32 {_, %rs3}, %r7; -; SM70-NEXT: st.param.b16 [func_retval0], %rs3; +; SM70-NEXT: shr.u32 %r8, %r7, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r8; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i1( @@ -1096,8 +1092,8 @@ define bfloat @test_uitofp_i16(i16 %a) { ; SM70-LABEL: test_uitofp_i16( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<3>; -; SM70-NEXT: .reg .b32 %r<7>; +; SM70-NEXT: .reg .b16 %rs<2>; +; SM70-NEXT: .reg .b32 %r<8>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %rs1, [test_uitofp_i16_param_0]; @@ -1108,8 +1104,8 @@ define bfloat @test_uitofp_i16(i16 %a) { ; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1; ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; -; SM70-NEXT: mov.b32 {_, %rs2}, %r6; -; SM70-NEXT: st.param.b16 [func_retval0], %rs2; +; SM70-NEXT: shr.u32 %r7, %r6, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r7; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i16( @@ -1153,8 +1149,7 @@ define bfloat @test_uitofp_i32(i32 %a) { ; SM70-LABEL: test_uitofp_i32( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<8>; +; SM70-NEXT: .reg .b32 %r<9>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b32 %r1, [test_uitofp_i32_param_0]; @@ -1165,8 +1160,8 @@ define bfloat @test_uitofp_i32(i32 %a) { ; SM70-NEXT: setp.nan.f32 %p1, %r2, %r2; ; SM70-NEXT: or.b32 %r6, %r2, 4194304; ; SM70-NEXT: selp.b32 %r7, %r6, %r5, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r7; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r8, %r7, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r8; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i32( @@ -1211,8 +1206,7 @@ define bfloat @test_uitofp_i64(i64 %a) { ; SM70-LABEL: test_uitofp_i64( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<7>; +; SM70-NEXT: .reg .b32 %r<8>; ; SM70-NEXT: .reg .b64 %rd<2>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: @@ -1224,8 +1218,8 @@ define bfloat @test_uitofp_i64(i64 %a) { ; SM70-NEXT: setp.nan.f32 %p1, %r1, %r1; ; SM70-NEXT: or.b32 %r5, %r1, 4194304; ; SM70-NEXT: selp.b32 %r6, %r5, %r4, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r6; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r7, %r6, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r7; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_uitofp_i64( @@ -1272,8 +1266,7 @@ define bfloat @test_roundeven(bfloat %a) { ; SM70-LABEL: test_roundeven( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<9>; +; SM70-NEXT: .reg .b32 %r<10>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %r1, [test_roundeven_param_0]; @@ -1285,8 +1278,8 @@ define bfloat @test_roundeven(bfloat %a) { ; SM70-NEXT: setp.nan.f32 %p1, %r3, %r3; ; SM70-NEXT: or.b32 %r7, %r3, 4194304; ; SM70-NEXT: selp.b32 %r8, %r7, %r6, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r8; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r9, %r8, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r9; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_roundeven( @@ -1397,8 +1390,7 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { ; SM70-LABEL: test_maxnum( ; SM70: { ; SM70-NEXT: .reg .pred %p<2>; -; SM70-NEXT: .reg .b16 %rs<2>; -; SM70-NEXT: .reg .b32 %r<11>; +; SM70-NEXT: .reg .b32 %r<12>; ; SM70-EMPTY: ; SM70-NEXT: // %bb.0: ; SM70-NEXT: ld.param.b16 %r1, [test_maxnum_param_1]; @@ -1412,8 +1404,8 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) { ; SM70-NEXT: setp.nan.f32 %p1, %r5, %r5; ; SM70-NEXT: or.b32 %r9, %r5, 4194304; ; SM70-NEXT: selp.b32 %r10, %r9, %r8, %p1; -; SM70-NEXT: mov.b32 {_, %rs1}, %r10; -; SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; SM70-NEXT: shr.u32 %r11, %r10, 16; +; SM70-NEXT: st.param.b16 [func_retval0], %r11; ; SM70-NEXT: ret; ; ; SM80-LABEL: test_maxnum( diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index 15de26ff4df30..ba5813c869236 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -9,11 +9,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define <2 x bfloat> @test_ret_const() #0 { ; CHECK-LABEL: test_ret_const( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 1073758080; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 1073758080; ; CHECK-NEXT: ret; ret <2 x bfloat> } diff --git a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll index aa08b9605790c..c5a995ae47bc1 100644 --- a/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll +++ b/llvm/test/CodeGen/NVPTX/compute-ptx-value-vts.ll @@ -6,12 +6,11 @@ target triple = "nvptx-nvidia-cuda" define <6 x half> @half6() { ; CHECK-LABEL: half6( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0x0000; -; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0+8], 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <6 x half> zeroinitializer } @@ -19,13 +18,12 @@ define <6 x half> @half6() { define <10 x half> @half10() { ; CHECK-LABEL: half10( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0x0000; -; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b16 [func_retval0+16], {%rs1, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0+16], 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+8], {0, 0}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <10 x half> zeroinitializer } @@ -33,13 +31,11 @@ define <10 x half> @half10() { define <12 x i8> @byte12() { ; CHECK-LABEL: byte12( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0+8], 0; +; CHECK-NEXT: st.param.b64 [func_retval0], 0; ; CHECK-NEXT: ret; ret <12 x i8> zeroinitializer } @@ -47,15 +43,12 @@ define <12 x i8> @byte12() { define <20 x i8> @byte20() { ; CHECK-LABEL: byte20( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+16], {%rs1, %rs1, %rs1, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0+16], 0; +; CHECK-NEXT: st.param.b64 [func_retval0+8], 0; +; CHECK-NEXT: st.param.b64 [func_retval0], 0; ; CHECK-NEXT: ret; ret <20 x i8> zeroinitializer } diff --git a/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll index 5e856112c0142..f8bfcd5531502 100644 --- a/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll +++ b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll @@ -100,14 +100,13 @@ define i8 @cvt_s8_f32(float %x) { ; CHECK-LABEL: cvt_s8_f32( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [cvt_s8_f32_param_0]; ; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %r1; ; CHECK-NEXT: cvt.u32.u16 %r2, %rs1; -; CHECK-NEXT: and.b32 %r3, %r2, 255; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %a = fptosi float %x to i8 ret i8 %a @@ -117,15 +116,14 @@ define i8 @cvt_s8_f64(double %x) { ; CHECK-LABEL: cvt_s8_f64( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [cvt_s8_f64_param_0]; ; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %rd1; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; -; CHECK-NEXT: and.b32 %r2, %r1, 255; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %a = fptosi double %x to i8 ret i8 %a diff --git a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll index ce6a16d9c0400..8a0c0f8c3b452 100644 --- a/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll +++ b/llvm/test/CodeGen/NVPTX/convert-int-sm20.ll @@ -1,66 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} - ;; Integer conversions happen inplicitly by loading/storing the proper types - ; i16 define i16 @cvt_i16_i32(i32 %x) { -; CHECK: ld.param.b16 %r[[R0:[0-9]+]], [cvt_i16_i32_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] -; CHECK: ret +; CHECK-LABEL: cvt_i16_i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [cvt_i16_i32_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %a = trunc i32 %x to i16 ret i16 %a } define i16 @cvt_i16_i64(i64 %x) { -; CHECK: ld.param.b16 %r[[R0:[0-9]+]], [cvt_i16_i64_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] -; CHECK: ret +; CHECK-LABEL: cvt_i16_i64( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [cvt_i16_i64_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: ret; %a = trunc i64 %x to i16 ret i16 %a } - - ; i32 define i32 @cvt_i32_i16(i16 %x) { -; CHECK: ld.param.b16 %r[[R0:[0-9]+]], [cvt_i32_i16_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] -; CHECK: ret +; CHECK-LABEL: cvt_i32_i16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %r1, [cvt_i32_i16_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %a = zext i16 %x to i32 ret i32 %a } define i32 @cvt_i32_i64(i64 %x) { -; CHECK: ld.param.b32 %r[[R0:[0-9]+]], [cvt_i32_i64_param_{{[0-9]+}}] -; CHECK: st.param.b32 [func_retval{{[0-9]+}}], %r[[R0]] -; CHECK: ret +; CHECK-LABEL: cvt_i32_i64( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [cvt_i32_i64_param_0]; +; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: ret; %a = trunc i64 %x to i32 ret i32 %a } - - ; i64 define i64 @cvt_i64_i16(i16 %x) { -; CHECK: ld.param.b16 %rd[[R0:[0-9]+]], [cvt_i64_i16_param_{{[0-9]+}}] -; CHECK: st.param.b64 [func_retval{{[0-9]+}}], %rd[[R0]] -; CHECK: ret +; CHECK-LABEL: cvt_i64_i16( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rd1, [cvt_i64_i16_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; %a = zext i16 %x to i64 ret i64 %a } define i64 @cvt_i64_i32(i32 %x) { -; CHECK: ld.param.b32 %rd[[R0:[0-9]+]], [cvt_i64_i32_param_{{[0-9]+}}] -; CHECK: st.param.b64 [func_retval{{[0-9]+}}], %rd[[R0]] -; CHECK: ret +; CHECK-LABEL: cvt_i64_i32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %rd1, [cvt_i64_i32_param_0]; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; %a = zext i32 %x to i64 ret i64 %a } diff --git a/llvm/test/CodeGen/NVPTX/elect.ll b/llvm/test/CodeGen/NVPTX/elect.ll index 93c30a9b00068..b65fa5a6376ef 100644 --- a/llvm/test/CodeGen/NVPTX/elect.ll +++ b/llvm/test/CodeGen/NVPTX/elect.ll @@ -54,9 +54,9 @@ define {i32, i1} @elect_sync_twice(i32 %mask) { ; CHECK-NEXT: ld.param.b32 %r1, [elect_sync_twice_param_0]; ; CHECK-NEXT: elect.sync %r2|%p1, %r1; ; CHECK-NEXT: elect.sync %r3|%p2, %r1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p1; ; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %val = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask) %val2 = call {i32, i1} @llvm.nvvm.elect.sync(i32 %mask) diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll index 79d80e6f8fa84..b1eadf381d3b4 100644 --- a/llvm/test/CodeGen/NVPTX/extractelement.ll +++ b/llvm/test/CodeGen/NVPTX/extractelement.ll @@ -41,7 +41,7 @@ define i1 @test_v2i8_load(ptr %a) { ; CHECK-NEXT: or.b16 %rs5, %rs1, %rs2; ; CHECK-NEXT: and.b16 %rs6, %rs5, 255; ; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0; -; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %v = load <2 x i8>, ptr %a, align 4 diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll index 252edf4b02c76..13f1c2f30b830 100644 --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -43,8 +43,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( -; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00; -; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; +; CHECK: st.param.b16 [func_retval0], 0x3C00; ; CHECK-NEXT: ret; define half @test_ret_const() #0 { ret half 1.0 @@ -384,7 +383,7 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_une(half %a, half %b) #0 { @@ -400,7 +399,7 @@ define i1 @test_fcmp_une(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.equ.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ueq(half %a, half %b) #0 { @@ -416,7 +415,7 @@ define i1 @test_fcmp_ueq(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.gtu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ugt(half %a, half %b) #0 { @@ -432,7 +431,7 @@ define i1 @test_fcmp_ugt(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.geu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_uge(half %a, half %b) #0 { @@ -448,7 +447,7 @@ define i1 @test_fcmp_uge(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.ltu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ult(half %a, half %b) #0 { @@ -464,7 +463,7 @@ define i1 @test_fcmp_ult(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.leu.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ule(half %a, half %b) #0 { @@ -481,7 +480,7 @@ define i1 @test_fcmp_ule(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.nan.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_uno(half %a, half %b) #0 { @@ -497,7 +496,7 @@ define i1 @test_fcmp_uno(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.ne.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_one(half %a, half %b) #0 { @@ -513,7 +512,7 @@ define i1 @test_fcmp_one(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.eq.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_oeq(half %a, half %b) #0 { @@ -529,7 +528,7 @@ define i1 @test_fcmp_oeq(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.gt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ogt(half %a, half %b) #0 { @@ -545,7 +544,7 @@ define i1 @test_fcmp_ogt(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.ge.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_oge(half %a, half %b) #0 { @@ -561,7 +560,7 @@ define i1 @test_fcmp_oge(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.lt.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_olt(half %a, half %b) #0 { @@ -577,7 +576,7 @@ define i1 @test_fcmp_olt(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.le.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ole(half %a, half %b) #0 { @@ -593,7 +592,7 @@ define i1 @test_fcmp_ole(half %a, half %b) #0 { ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%r[0-9]+]], [[B]]; ; CHECK-NOF16: setp.num.f32 [[PRED:%p[0-9]+]], [[AF]], [[BF]] -; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], 1, 0, [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], -1, 0, [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0], [[R]]; ; CHECK-NEXT: ret; define i1 @test_fcmp_ord(half %a, half %b) #0 { diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 2b2fed153d823..43a605f2b34d7 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -34,11 +34,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define <2 x half> @test_ret_const() #0 { ; CHECK-LABEL: test_ret_const( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 1073757184; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 1073757184; ; CHECK-NEXT: ret; ret <2 x half> } diff --git a/llvm/test/CodeGen/NVPTX/fexp2.ll b/llvm/test/CodeGen/NVPTX/fexp2.ll index 7c5e2f83d62c8..391aa453f0757 100644 --- a/llvm/test/CodeGen/NVPTX/fexp2.ll +++ b/llvm/test/CodeGen/NVPTX/fexp2.ll @@ -252,8 +252,7 @@ define bfloat @exp2_bf16_test(bfloat %in) { ; CHECK-LABEL: exp2_bf16_test( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: ld.param.b16 %r1, [exp2_bf16_test_param_0]; @@ -265,15 +264,14 @@ define bfloat @exp2_bf16_test(bfloat %in) { ; CHECK-NEXT: setp.nan.f32 %p1, %r3, %r3; ; CHECK-NEXT: or.b32 %r7, %r3, 4194304; ; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1; -; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-NEXT: shr.u32 %r9, %r8, 16; +; CHECK-NEXT: st.param.b16 [func_retval0], %r9; ; CHECK-NEXT: ret; ; ; CHECK-FP16-LABEL: exp2_bf16_test( ; CHECK-FP16: { ; CHECK-FP16-NEXT: .reg .pred %p<2>; -; CHECK-FP16-NEXT: .reg .b16 %rs<2>; -; CHECK-FP16-NEXT: .reg .b32 %r<9>; +; CHECK-FP16-NEXT: .reg .b32 %r<10>; ; CHECK-FP16-EMPTY: ; CHECK-FP16-NEXT: // %bb.0: // %entry ; CHECK-FP16-NEXT: ld.param.b16 %r1, [exp2_bf16_test_param_0]; @@ -285,8 +283,8 @@ define bfloat @exp2_bf16_test(bfloat %in) { ; CHECK-FP16-NEXT: setp.nan.f32 %p1, %r3, %r3; ; CHECK-FP16-NEXT: or.b32 %r7, %r3, 4194304; ; CHECK-FP16-NEXT: selp.b32 %r8, %r7, %r6, %p1; -; CHECK-FP16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } -; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-FP16-NEXT: shr.u32 %r9, %r8, 16; +; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %r9; ; CHECK-FP16-NEXT: ret; ; ; CHECK-BF16-LABEL: exp2_bf16_test( diff --git a/llvm/test/CodeGen/NVPTX/flog2.ll b/llvm/test/CodeGen/NVPTX/flog2.ll index 890c2f8a2678d..acac5a8da4e14 100644 --- a/llvm/test/CodeGen/NVPTX/flog2.ll +++ b/llvm/test/CodeGen/NVPTX/flog2.ll @@ -122,8 +122,7 @@ define bfloat @log2_bf16_test(bfloat %in) { ; CHECK-LABEL: log2_bf16_test( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: ld.param.b16 %r1, [log2_bf16_test_param_0]; @@ -135,8 +134,8 @@ define bfloat @log2_bf16_test(bfloat %in) { ; CHECK-NEXT: setp.nan.f32 %p1, %r3, %r3; ; CHECK-NEXT: or.b32 %r7, %r3, 4194304; ; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1; -; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-NEXT: shr.u32 %r9, %r8, 16; +; CHECK-NEXT: st.param.b16 [func_retval0], %r9; ; CHECK-NEXT: ret; entry: %log2 = call bfloat @llvm.log2.bf16(bfloat %in) @@ -148,8 +147,7 @@ define bfloat @log2_bf16_ftz_test(bfloat %in) #0 { ; CHECK-LABEL: log2_bf16_ftz_test( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<2>; -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: ld.param.b16 %r1, [log2_bf16_ftz_test_param_0]; @@ -161,8 +159,8 @@ define bfloat @log2_bf16_ftz_test(bfloat %in) #0 { ; CHECK-NEXT: setp.nan.ftz.f32 %p1, %r3, %r3; ; CHECK-NEXT: or.b32 %r7, %r3, 4194304; ; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1; -; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-NEXT: shr.u32 %r9, %r8, 16; +; CHECK-NEXT: st.param.b16 [func_retval0], %r9; ; CHECK-NEXT: ret; entry: %log2 = call bfloat @llvm.log2.bf16(bfloat %in) diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll index 42160b50348c4..92293ab171a12 100644 --- a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll +++ b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll @@ -384,8 +384,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat ; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<5>; -; CHECK-SM70-NEXT: .reg .b16 %rs<4>; -; CHECK-SM70-NEXT: .reg .b32 %r<29>; +; CHECK-SM70-NEXT: .reg .b16 %rs<3>; +; CHECK-SM70-NEXT: .reg .b32 %r<30>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2]; @@ -422,8 +422,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat ; CHECK-SM70-NEXT: setp.nan.f32 %p4, %r23, %r23; ; CHECK-SM70-NEXT: or.b32 %r27, %r23, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r28, %r27, %r26, %p4; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r28; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-SM70-NEXT: shr.u32 %r29, %r28, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r29; ; CHECK-SM70-NEXT: ret; %1 = fmul bfloat %a, %b %2 = fadd bfloat %1, %c @@ -462,8 +462,7 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) ; CHECK-SM70-LABEL: fma_bf16_expanded_maxnum_no_nans( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<3>; -; CHECK-SM70-NEXT: .reg .b16 %rs<2>; -; CHECK-SM70-NEXT: .reg .b32 %r<20>; +; CHECK-SM70-NEXT: .reg .b32 %r<21>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_maxnum_no_nans_param_2]; @@ -487,8 +486,8 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) ; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14; ; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20; ; CHECK-SM70-NEXT: ret; %1 = fmul bfloat %a, %b %2 = fadd bfloat %1, %c diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll index 6f6c5d7340789..2f1d7d6321438 100644 --- a/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll +++ b/llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll @@ -253,8 +253,7 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa ; CHECK-SM70-LABEL: fma_bf16_no_nans_multiple_uses_of_fma( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<4>; -; CHECK-SM70-NEXT: .reg .b16 %rs<2>; -; CHECK-SM70-NEXT: .reg .b32 %r<27>; +; CHECK-SM70-NEXT: .reg .b32 %r<28>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_no_nans_multiple_uses_of_fma_param_2]; @@ -286,8 +285,8 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa ; CHECK-SM70-NEXT: setp.nan.f32 %p3, %r21, %r21; ; CHECK-SM70-NEXT: or.b32 %r25, %r21, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r26, %r25, %r24, %p3; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r26; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-SM70-NEXT: shr.u32 %r27, %r26, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r27; ; CHECK-SM70-NEXT: ret; %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) %2 = fcmp ogt bfloat %1, 0.0 @@ -325,8 +324,7 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 { ; CHECK-SM70-LABEL: fma_bf16_maxnum_no_nans( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<3>; -; CHECK-SM70-NEXT: .reg .b16 %rs<2>; -; CHECK-SM70-NEXT: .reg .b32 %r<20>; +; CHECK-SM70-NEXT: .reg .b32 %r<21>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_maxnum_no_nans_param_2]; @@ -350,8 +348,8 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 { ; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14; ; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20; ; CHECK-SM70-NEXT: ret; %1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) %2 = call bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0) diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll b/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll index 60bfe3fa2cbf6..090dd00f1b933 100644 --- a/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll +++ b/llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll @@ -271,8 +271,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat ; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<5>; -; CHECK-SM70-NEXT: .reg .b16 %rs<4>; -; CHECK-SM70-NEXT: .reg .b32 %r<29>; +; CHECK-SM70-NEXT: .reg .b16 %rs<3>; +; CHECK-SM70-NEXT: .reg .b32 %r<30>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2]; @@ -309,8 +309,8 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat ; CHECK-SM70-NEXT: setp.nan.f32 %p4, %r23, %r23; ; CHECK-SM70-NEXT: or.b32 %r27, %r23, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r28, %r27, %r26, %p4; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs3}, %r28; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-SM70-NEXT: shr.u32 %r29, %r28, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r29; ; CHECK-SM70-NEXT: ret; %1 = fmul fast bfloat %a, %b %2 = fadd fast bfloat %1, %c @@ -351,8 +351,7 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) ; CHECK-SM70-LABEL: fma_bf16_expanded_maxnum_no_nans( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<3>; -; CHECK-SM70-NEXT: .reg .b16 %rs<2>; -; CHECK-SM70-NEXT: .reg .b32 %r<20>; +; CHECK-SM70-NEXT: .reg .b32 %r<21>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_expanded_maxnum_no_nans_param_2]; @@ -376,8 +375,8 @@ define bfloat @fma_bf16_expanded_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) ; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14; ; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20; ; CHECK-SM70-NEXT: ret; %1 = fmul fast bfloat %a, %b %2 = fadd fast bfloat %1, %c @@ -1111,8 +1110,7 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa ; CHECK-SM70-LABEL: fma_bf16_no_nans_multiple_uses_of_fma( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<4>; -; CHECK-SM70-NEXT: .reg .b16 %rs<2>; -; CHECK-SM70-NEXT: .reg .b32 %r<27>; +; CHECK-SM70-NEXT: .reg .b32 %r<28>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_no_nans_multiple_uses_of_fma_param_2]; @@ -1144,8 +1142,8 @@ define bfloat @fma_bf16_no_nans_multiple_uses_of_fma(bfloat %a, bfloat %b, bfloa ; CHECK-SM70-NEXT: setp.nan.f32 %p3, %r21, %r21; ; CHECK-SM70-NEXT: or.b32 %r25, %r21, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r26, %r25, %r24, %p3; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r26; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-SM70-NEXT: shr.u32 %r27, %r26, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r27; ; CHECK-SM70-NEXT: ret; %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) %2 = fcmp nsz ogt bfloat %1, 0.0 @@ -1183,8 +1181,7 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) { ; CHECK-SM70-LABEL: fma_bf16_maxnum_no_nans( ; CHECK-SM70: { ; CHECK-SM70-NEXT: .reg .pred %p<3>; -; CHECK-SM70-NEXT: .reg .b16 %rs<2>; -; CHECK-SM70-NEXT: .reg .b32 %r<20>; +; CHECK-SM70-NEXT: .reg .b32 %r<21>; ; CHECK-SM70-EMPTY: ; CHECK-SM70-NEXT: // %bb.0: ; CHECK-SM70-NEXT: ld.param.b16 %r1, [fma_bf16_maxnum_no_nans_param_2]; @@ -1208,8 +1205,8 @@ define bfloat @fma_bf16_maxnum_no_nans(bfloat %a, bfloat %b, bfloat %c) { ; CHECK-SM70-NEXT: setp.nan.f32 %p2, %r14, %r14; ; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304; ; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2; -; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; } -; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-SM70-NEXT: shr.u32 %r20, %r19, 16; +; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %r20; ; CHECK-SM70-NEXT: ret; %1 = call nnan bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c) %2 = call nsz bfloat @llvm.maxnum.bf16(bfloat %1, bfloat 0.0) diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll index b74e531adba3f..5aa12b08a3812 100644 --- a/llvm/test/CodeGen/NVPTX/fma.ll +++ b/llvm/test/CodeGen/NVPTX/fma.ll @@ -116,11 +116,10 @@ define ptx_device float @f32_iir(float %x) { define ptx_device float @f32_iii(float %x) { ; CHECK-LABEL: f32_iii( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0f41200000; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 1092616192; ; CHECK-NEXT: ret; %r = call float @llvm.fma.f32(float 2.0, float 3.0, float 4.0) ret float %r diff --git a/llvm/test/CodeGen/NVPTX/i1-icmp.ll b/llvm/test/CodeGen/NVPTX/i1-icmp.ll index e43a9da88a50e..5d5e861432b25 100644 --- a/llvm/test/CodeGen/NVPTX/i1-icmp.ll +++ b/llvm/test/CodeGen/NVPTX/i1-icmp.ll @@ -8,7 +8,7 @@ define i32 @icmp_i1_eq(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_eq( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_eq_param_0]; @@ -18,12 +18,10 @@ define i32 @icmp_i1_eq(i32 %a, i32 %b) { ; CHECK-NEXT: xor.pred %p3, %p1, %p2; ; CHECK-NEXT: @%p3 bra $L__BB0_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB0_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -39,7 +37,7 @@ define i32 @icmp_i1_ne(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_ne( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<5>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ne_param_0]; @@ -50,12 +48,10 @@ define i32 @icmp_i1_ne(i32 %a, i32 %b) { ; CHECK-NEXT: not.pred %p4, %p3; ; CHECK-NEXT: @%p4 bra $L__BB1_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -71,7 +67,7 @@ define i32 @icmp_i1_sgt(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_sgt( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_sgt_param_0]; @@ -81,12 +77,10 @@ define i32 @icmp_i1_sgt(i32 %a, i32 %b) { ; CHECK-NEXT: or.pred %p3, %p1, %p2; ; CHECK-NEXT: @%p3 bra $L__BB2_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB2_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -102,7 +96,7 @@ define i32 @icmp_i1_slt(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_slt( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_slt_param_0]; @@ -112,12 +106,10 @@ define i32 @icmp_i1_slt(i32 %a, i32 %b) { ; CHECK-NEXT: or.pred %p3, %p2, %p1; ; CHECK-NEXT: @%p3 bra $L__BB3_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB3_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -133,7 +125,7 @@ define i32 @icmp_i1_sge(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_sge( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_sge_param_0]; @@ -143,12 +135,10 @@ define i32 @icmp_i1_sge(i32 %a, i32 %b) { ; CHECK-NEXT: and.pred %p3, %p1, %p2; ; CHECK-NEXT: @%p3 bra $L__BB4_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB4_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -164,7 +154,7 @@ define i32 @icmp_i1_sle(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_sle( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_sle_param_0]; @@ -174,12 +164,10 @@ define i32 @icmp_i1_sle(i32 %a, i32 %b) { ; CHECK-NEXT: and.pred %p3, %p2, %p1; ; CHECK-NEXT: @%p3 bra $L__BB5_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB5_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -195,7 +183,7 @@ define i32 @icmp_i1_uge(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_uge( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_uge_param_0]; @@ -205,12 +193,10 @@ define i32 @icmp_i1_uge(i32 %a, i32 %b) { ; CHECK-NEXT: and.pred %p3, %p2, %p1; ; CHECK-NEXT: @%p3 bra $L__BB6_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB6_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -226,7 +212,7 @@ define i32 @icmp_i1_ugt(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_ugt( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ugt_param_0]; @@ -236,12 +222,10 @@ define i32 @icmp_i1_ugt(i32 %a, i32 %b) { ; CHECK-NEXT: or.pred %p3, %p2, %p1; ; CHECK-NEXT: @%p3 bra $L__BB7_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB7_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -257,7 +241,7 @@ define i32 @icmp_i1_ule(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_ule( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ule_param_0]; @@ -267,12 +251,10 @@ define i32 @icmp_i1_ule(i32 %a, i32 %b) { ; CHECK-NEXT: and.pred %p3, %p1, %p2; ; CHECK-NEXT: @%p3 bra $L__BB8_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB8_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 @@ -288,7 +270,7 @@ define i32 @icmp_i1_ult(i32 %a, i32 %b) { ; CHECK-LABEL: icmp_i1_ult( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<4>; -; CHECK-NEXT: .reg .b32 %r<5>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [icmp_i1_ult_param_0]; @@ -298,12 +280,10 @@ define i32 @icmp_i1_ult(i32 %a, i32 %b) { ; CHECK-NEXT: or.pred %p3, %p1, %p2; ; CHECK-NEXT: @%p3 bra $L__BB9_2; ; CHECK-NEXT: // %bb.1: // %bb1 -; CHECK-NEXT: mov.b32 %r4, 1; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 1; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB9_2: // %bb2 -; CHECK-NEXT: mov.b32 %r3, 127; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 127; ; CHECK-NEXT: ret; %p1 = icmp sgt i32 %a, 1 %p2 = icmp sgt i32 %b, 1 diff --git a/llvm/test/CodeGen/NVPTX/i128-ld-st.ll b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll index abe92a5bf79b9..83f1ed43960fe 100644 --- a/llvm/test/CodeGen/NVPTX/i128-ld-st.ll +++ b/llvm/test/CodeGen/NVPTX/i128-ld-st.ll @@ -7,15 +7,14 @@ target triple = "nvptx64-nvidia-cuda" define i128 @foo(ptr %p, ptr %o) { ; CHECK-LABEL: foo( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd2, [foo_param_1]; ; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0]; ; CHECK-NEXT: ld.b8 %rd3, [%rd1]; ; CHECK-NEXT: st.v2.b64 [%rd2], {%rd3, 0}; -; CHECK-NEXT: mov.b64 %rd4, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, 0}; ; CHECK-NEXT: ret; %c = load i8, ptr %p, align 1 %i = zext i8 %c to i128 diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll index ecd42fd6ceb3c..29408a24213cc 100644 --- a/llvm/test/CodeGen/NVPTX/i128.ll +++ b/llvm/test/CodeGen/NVPTX/i128.ll @@ -292,13 +292,12 @@ define i128 @srem_i128_pow2k(i128 %lhs) { define i128 @urem_i128_pow2k(i128 %lhs) { ; CHECK-LABEL: urem_i128_pow2k( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_pow2k_param_0]; ; CHECK-NEXT: and.b64 %rd3, %rd1, 8589934591; -; CHECK-NEXT: mov.b64 %rd4, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, 0}; ; CHECK-NEXT: ret; %div = urem i128 %lhs, 8589934592 ret i128 %div diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index d5ddadf2b21c5..e89ab7a5605c3 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -23,11 +23,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define <2 x i16> @test_ret_const() #0 { ; COMMON-LABEL: test_ret_const( ; COMMON: { -; COMMON-NEXT: .reg .b32 %r<2>; +; COMMON-EMPTY: ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: -; COMMON-NEXT: mov.b32 %r1, 131073; -; COMMON-NEXT: st.param.b32 [func_retval0], %r1; +; COMMON-NEXT: st.param.b32 [func_retval0], 131073; ; COMMON-NEXT: ret; ret <2 x i16> } @@ -905,9 +904,9 @@ define <2 x i32> @test_zext_2xi32(<2 x i16> %a) #0 { ; COMMON-NEXT: // %bb.0: ; COMMON-NEXT: ld.param.b32 %r1, [test_zext_2xi32_param_0]; ; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1; -; COMMON-NEXT: cvt.u32.u16 %r2, %rs1; -; COMMON-NEXT: cvt.u32.u16 %r3, %rs2; -; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r3}; +; COMMON-NEXT: cvt.u32.u16 %r2, %rs2; +; COMMON-NEXT: cvt.u32.u16 %r3, %rs1; +; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; ; COMMON-NEXT: ret; %r = zext <2 x i16> %a to <2 x i32> ret <2 x i32> %r @@ -960,14 +959,11 @@ define i32 @test_bitcast_2xi16_to_i32(<2 x i16> %a) #0 { define <2 x half> @test_bitcast_2xi16_to_2xhalf(i16 %a) #0 { ; COMMON-LABEL: test_bitcast_2xi16_to_2xhalf( ; COMMON: { -; COMMON-NEXT: .reg .b16 %rs<3>; -; COMMON-NEXT: .reg .b32 %r<2>; +; COMMON-NEXT: .reg .b16 %rs<2>; ; COMMON-EMPTY: ; COMMON-NEXT: // %bb.0: ; COMMON-NEXT: ld.param.b16 %rs1, [test_bitcast_2xi16_to_2xhalf_param_0]; -; COMMON-NEXT: mov.b16 %rs2, 5; -; COMMON-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; COMMON-NEXT: st.param.b32 [func_retval0], %r1; +; COMMON-NEXT: st.param.v2.b16 [func_retval0], {%rs1, 5}; ; COMMON-NEXT: ret; %ins.0 = insertelement <2 x i16> undef, i16 %a, i32 0 %ins.1 = insertelement <2 x i16> %ins.0, i16 5, i32 1 diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll index 718840897c696..3edd4e4da60e0 100644 --- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll @@ -13,16 +13,15 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) { ; CHECK-LABEL: test_bitcast_2xi8_i16( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<6>; +; CHECK-NEXT: .reg .b16 %rs<5>; ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_bitcast_2xi8_i16_param_0]; -; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; +; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; -; CHECK-NEXT: and.b16 %rs4, %rs1, 255; -; CHECK-NEXT: or.b16 %rs5, %rs4, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r2, %rs5; +; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r2, %rs4; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %res = bitcast <2 x i8> %a to i16 @@ -32,12 +31,11 @@ define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) { define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) { ; CHECK-LABEL: test_bitcast_i16_2xi8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b16 %rs<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; -; CHECK-NEXT: shr.u16 %rs2, %rs1, 8; -; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs1, %rs2}; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; %res = bitcast i16 %a to <2 x i8> ret <2 x i8> %res diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 72c279bee4268..fd2e56bb126bb 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -14,11 +14,10 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define <4 x i8> @test_ret_const() #0 { ; CHECK-LABEL: test_ret_const( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, -66911489; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], -66911489; ; CHECK-NEXT: ret; ret <4 x i8> } diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll index d41b9b9973351..be84f9bfb1aeb 100644 --- a/llvm/test/CodeGen/NVPTX/idioms.ll +++ b/llvm/test/CodeGen/NVPTX/idioms.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; Check that various LLVM idioms get lowered to NVPTX as expected. ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s @@ -8,114 +9,178 @@ %struct.S16 = type { i16, i16 } %struct.S32 = type { i32, i32 } -; CHECK-LABEL: abs_i16( define i16 @abs_i16(i16 %a) { -; CHECK: abs.s16 +; CHECK-LABEL: abs_i16( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [abs_i16_param_0]; +; CHECK-NEXT: abs.s16 %rs2, %rs1; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %neg = sub i16 0, %a %abs.cond = icmp sge i16 %a, 0 %abs = select i1 %abs.cond, i16 %a, i16 %neg ret i16 %abs } -; CHECK-LABEL: abs_i32( define i32 @abs_i32(i32 %a) { -; CHECK: abs.s32 +; CHECK-LABEL: abs_i32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [abs_i32_param_0]; +; CHECK-NEXT: abs.s32 %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; %neg = sub i32 0, %a %abs.cond = icmp sge i32 %a, 0 %abs = select i1 %abs.cond, i32 %a, i32 %neg ret i32 %abs } -; CHECK-LABEL: abs_i64( define i64 @abs_i64(i64 %a) { -; CHECK: abs.s64 +; CHECK-LABEL: abs_i64( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [abs_i64_param_0]; +; CHECK-NEXT: abs.s64 %rd2, %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %neg = sub i64 0, %a %abs.cond = icmp sge i64 %a, 0 %abs = select i1 %abs.cond, i64 %a, i64 %neg ret i64 %abs } -; CHECK-LABEL: i32_to_2xi16( define %struct.S16 @i32_to_2xi16(i32 noundef %in) { +; CHECK-LABEL: i32_to_2xi16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_param_0]; +; CHECK-NEXT: shr.u32 %r2, %r1, 16; +; CHECK-NEXT: st.param.b16 [func_retval0], %r1; +; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2; +; CHECK-NEXT: ret; %low = trunc i32 %in to i16 %high32 = lshr i32 %in, 16 %high = trunc i32 %high32 to i16 -; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_param_0]; -; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]]; -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]]; %s1 = insertvalue %struct.S16 poison, i16 %low, 0 %s = insertvalue %struct.S16 %s1, i16 %high, 1 ret %struct.S16 %s } -; CHECK-LABEL: i32_to_2xi16_lh( ; Same as above, but with rearranged order of low/high parts. define %struct.S16 @i32_to_2xi16_lh(i32 noundef %in) { +; CHECK-LABEL: i32_to_2xi16_lh( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_lh_param_0]; +; CHECK-NEXT: shr.u32 %r2, %r1, 16; +; CHECK-NEXT: st.param.b16 [func_retval0], %r1; +; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2; +; CHECK-NEXT: ret; %high32 = lshr i32 %in, 16 %high = trunc i32 %high32 to i16 %low = trunc i32 %in to i16 -; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_lh_param_0]; -; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]]; -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]]; %s1 = insertvalue %struct.S16 poison, i16 %low, 0 %s = insertvalue %struct.S16 %s1, i16 %high, 1 ret %struct.S16 %s } -; CHECK-LABEL: i32_to_2xi16_not( define %struct.S16 @i32_to_2xi16_not(i32 noundef %in) { +; CHECK-LABEL: i32_to_2xi16_not( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_not_param_0]; +; CHECK-NEXT: shr.u32 %r2, %r1, 15; +; CHECK-NEXT: st.param.b16 [func_retval0], %r1; +; CHECK-NEXT: st.param.b16 [func_retval0+2], %r2; +; CHECK-NEXT: ret; %low = trunc i32 %in to i16 ; Shift by any value other than 16 blocks the conversiopn to mov. %high32 = lshr i32 %in, 15 %high = trunc i32 %high32 to i16 -; CHECK: cvt.u16.u32 -; CHECK: shr.u32 -; CHECK: cvt.u16.u32 %s1 = insertvalue %struct.S16 poison, i16 %low, 0 %s = insertvalue %struct.S16 %s1, i16 %high, 1 ret %struct.S16 %s } -; CHECK-LABEL: i64_to_2xi32( define %struct.S32 @i64_to_2xi32(i64 noundef %in) { +; CHECK-LABEL: i64_to_2xi32( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_param_0]; +; CHECK-NEXT: shr.u64 %rd2, %rd1, 32; +; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2; +; CHECK-NEXT: ret; %low = trunc i64 %in to i32 %high64 = lshr i64 %in, 32 %high = trunc i64 %high64 to i32 -; CHECK: ld.param.b64 %[[R64:rd[0-9]+]], [i64_to_2xi32_param_0]; -; CHECK-DAG: cvt.u32.u64 %r{{[0-9+]}}, %[[R64]]; -; CHECK-DAG mov.b64 {tmp, %r{{[0-9+]}}}, %[[R64]]; %s1 = insertvalue %struct.S32 poison, i32 %low, 0 %s = insertvalue %struct.S32 %s1, i32 %high, 1 ret %struct.S32 %s } -; CHECK-LABEL: i64_to_2xi32_not( define %struct.S32 @i64_to_2xi32_not(i64 noundef %in) { +; CHECK-LABEL: i64_to_2xi32_not( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [i64_to_2xi32_not_param_0]; +; CHECK-NEXT: shr.u64 %rd2, %rd1, 31; +; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: st.param.b32 [func_retval0+4], %rd2; +; CHECK-NEXT: ret; %low = trunc i64 %in to i32 ; Shift by any value other than 32 blocks the conversiopn to mov. %high64 = lshr i64 %in, 31 %high = trunc i64 %high64 to i32 -; CHECK: cvt.u32.u64 -; CHECK: shr.u64 -; CHECK: cvt.u32.u64 %s1 = insertvalue %struct.S32 poison, i32 %low, 0 %s = insertvalue %struct.S32 %s1, i32 %high, 1 ret %struct.S32 %s } -; CHECK-LABEL: i32_to_2xi16_shr( ; Make sure we do not get confused when our input itself is [al]shr. define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){ +; CHECK-LABEL: i32_to_2xi16_shr( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [i32_to_2xi16_shr_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .b32 param0; +; CHECK-NEXT: st.param.b32 [param0], %r1; +; CHECK-NEXT: call.uni escape_int, (param0); +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: shr.s32 %r2, %r1, 16; +; CHECK-NEXT: shr.u32 %r3, %r2, 16; +; CHECK-NEXT: st.param.b16 [func_retval0], %r2; +; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3; +; CHECK-NEXT: ret; call void @escape_int(i32 %i); // Force %i to be loaded completely. %i1 = ashr i32 %i, 16 %l = trunc i32 %i1 to i16 %h32 = ashr i32 %i1, 16 %h = trunc i32 %h32 to i16 -; CHECK: ld.param.b32 %[[R32:r[0-9]+]], [i32_to_2xi16_shr_param_0]; -; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 16; -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]]; -; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32H]]; %s0 = insertvalue %struct.S16 poison, i16 %l, 0 %s1 = insertvalue %struct.S16 %s0, i16 %h, 1 ret %struct.S16 %s1 diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll index e1eeb66b5afc0..955befc624c71 100644 --- a/llvm/test/CodeGen/NVPTX/jump-table.ll +++ b/llvm/test/CodeGen/NVPTX/jump-table.ll @@ -69,7 +69,7 @@ define i32 @test2(i32 %tmp158) { ; CHECK-LABEL: test2( ; CHECK: { ; CHECK-NEXT: .reg .pred %p<6>; -; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: ld.param.b32 %r1, [test2_param_0]; @@ -96,36 +96,29 @@ define i32 @test2(i32 %tmp158) { ; CHECK-NEXT: $L__BB1_11; ; CHECK-NEXT: brx.idx %r2, $L_brx_0; ; CHECK-NEXT: $L__BB1_7: // %bb339 -; CHECK-NEXT: mov.b32 %r7, 12; -; CHECK-NEXT: st.param.b32 [func_retval0], %r7; +; CHECK-NEXT: st.param.b32 [func_retval0], 12; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_5: // %entry ; CHECK-NEXT: setp.eq.s32 %p3, %r1, 1024; ; CHECK-NEXT: @%p3 bra $L__BB1_3; ; CHECK-NEXT: bra.uni $L__BB1_6; ; CHECK-NEXT: $L__BB1_3: // %bb338 -; CHECK-NEXT: mov.b32 %r8, 11; -; CHECK-NEXT: st.param.b32 [func_retval0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], 11; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_10: // %bb342 -; CHECK-NEXT: mov.b32 %r4, 15; -; CHECK-NEXT: st.param.b32 [func_retval0], %r4; +; CHECK-NEXT: st.param.b32 [func_retval0], 15; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_6: // %bb336 -; CHECK-NEXT: mov.b32 %r9, 10; -; CHECK-NEXT: st.param.b32 [func_retval0], %r9; +; CHECK-NEXT: st.param.b32 [func_retval0], 10; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_8: // %bb340 -; CHECK-NEXT: mov.b32 %r6, 13; -; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: st.param.b32 [func_retval0], 13; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_9: // %bb341 -; CHECK-NEXT: mov.b32 %r5, 14; -; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: st.param.b32 [func_retval0], 14; ; CHECK-NEXT: ret; ; CHECK-NEXT: $L__BB1_11: // %bb343 -; CHECK-NEXT: mov.b32 %r3, 18; -; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: st.param.b32 [func_retval0], 18; ; CHECK-NEXT: ret; entry: switch i32 %tmp158, label %bb336 [ diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll index b4a74c762f523..d5078f5f19af6 100644 --- a/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll +++ b/llvm/test/CodeGen/NVPTX/ldg-invariant-256.ll @@ -11,7 +11,7 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) { ; SM90-LABEL: ld_global_v32i8( ; SM90: { ; SM90-NEXT: .reg .b16 %rs<16>; -; SM90-NEXT: .reg .b32 %r<19>; +; SM90-NEXT: .reg .b32 %r<18>; ; SM90-NEXT: .reg .b64 %rd<2>; ; SM90-EMPTY: ; SM90-NEXT: // %bb.0: @@ -42,14 +42,13 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) { ; SM90-NEXT: add.s16 %rs14, %rs11, %rs12; ; SM90-NEXT: add.s16 %rs15, %rs13, %rs14; ; SM90-NEXT: cvt.u32.u16 %r17, %rs15; -; SM90-NEXT: and.b32 %r18, %r17, 255; -; SM90-NEXT: st.param.b32 [func_retval0], %r18; +; SM90-NEXT: st.param.b32 [func_retval0], %r17; ; SM90-NEXT: ret; ; ; SM100-LABEL: ld_global_v32i8( ; SM100: { ; SM100-NEXT: .reg .b16 %rs<16>; -; SM100-NEXT: .reg .b32 %r<19>; +; SM100-NEXT: .reg .b32 %r<18>; ; SM100-NEXT: .reg .b64 %rd<2>; ; SM100-EMPTY: ; SM100-NEXT: // %bb.0: @@ -79,8 +78,7 @@ define i8 @ld_global_v32i8(ptr addrspace(1) %ptr) { ; SM100-NEXT: add.s16 %rs14, %rs11, %rs12; ; SM100-NEXT: add.s16 %rs15, %rs13, %rs14; ; SM100-NEXT: cvt.u32.u16 %r17, %rs15; -; SM100-NEXT: and.b32 %r18, %r17, 255; -; SM100-NEXT: st.param.b32 [func_retval0], %r18; +; SM100-NEXT: st.param.b32 [func_retval0], %r17; ; SM100-NEXT: ret; %a = load <32 x i8>, ptr addrspace(1) %ptr, !invariant.load !0 %v1 = extractelement <32 x i8> %a, i32 0 diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll index 06143debb6838..3bd46000661ce 100644 --- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll +++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll @@ -128,7 +128,7 @@ define i8 @ld_global_v8i8(ptr addrspace(1) %ptr) { ; CHECK-LABEL: ld_global_v8i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<8>; -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .b32 %r<8>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: @@ -146,8 +146,7 @@ define i8 @ld_global_v8i8(ptr addrspace(1) %ptr) { ; CHECK-NEXT: add.s16 %rs6, %rs2, %rs1; ; CHECK-NEXT: add.s16 %rs7, %rs5, %rs6; ; CHECK-NEXT: cvt.u32.u16 %r7, %rs7; -; CHECK-NEXT: and.b32 %r8, %r7, 255; -; CHECK-NEXT: st.param.b32 [func_retval0], %r8; +; CHECK-NEXT: st.param.b32 [func_retval0], %r7; ; CHECK-NEXT: ret; %a = load <8 x i8>, ptr addrspace(1) %ptr, !invariant.load !0 %v1 = extractelement <8 x i8> %a, i32 0 @@ -164,7 +163,7 @@ define i8 @ld_global_v16i8(ptr addrspace(1) %ptr) { ; CHECK-LABEL: ld_global_v16i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<16>; -; CHECK-NEXT: .reg .b32 %r<15>; +; CHECK-NEXT: .reg .b32 %r<14>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: @@ -194,8 +193,7 @@ define i8 @ld_global_v16i8(ptr addrspace(1) %ptr) { ; CHECK-NEXT: add.s16 %rs14, %rs11, %rs12; ; CHECK-NEXT: add.s16 %rs15, %rs13, %rs14; ; CHECK-NEXT: cvt.u32.u16 %r13, %rs15; -; CHECK-NEXT: and.b32 %r14, %r13, 255; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; +; CHECK-NEXT: st.param.b32 [func_retval0], %r13; ; CHECK-NEXT: ret; %a = load <16 x i8>, ptr addrspace(1) %ptr, !invariant.load !0 %v1 = extractelement <16 x i8> %a, i32 0 diff --git a/llvm/test/CodeGen/NVPTX/ldu-i8.ll b/llvm/test/CodeGen/NVPTX/ldu-i8.ll index 89f23f30f34e8..5a0a66b8b93e2 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-i8.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-i8.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} @@ -6,10 +7,18 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 declare i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr, i32) define i8 @foo(ptr %a) { -; Ensure we properly truncate off the high-order 24 bits -; CHECK: ldu.global.b8 -; CHECK: cvt.u32.u16 -; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 255 +; CHECK-LABEL: foo( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [foo_param_0]; +; CHECK-NEXT: ldu.global.b8 %rs1, [%rd1]; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0(ptr %a, i32 4) ret i8 %val } diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll index 7f4b049af84fb..977e7353f58e8 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -27,15 +27,14 @@ define i8 @test_ldu_i8(ptr addrspace(1) %ptr) { ; CHECK-LABEL: test_ldu_i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_ldu_i8_param_0]; ; CHECK-NEXT: ldu.global.b8 %rs1, [%rd1]; ; CHECK-NEXT: cvt.u32.u16 %r1, %rs1; -; CHECK-NEXT: and.b32 %r2, %r1, 255; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) ret i8 %val diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll index ff04e18701a84..a7f3103e5fcbb 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll @@ -1,26 +1,18 @@ -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 -O0 | FileCheck %s --check-prefix=SM_52 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx64 -O0 | FileCheck %s --check-prefix=SM_70 -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx72 -O0 | FileCheck %s --check-prefix=SM_90 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 -O0 | FileCheck %s --check-prefixes=SM_52,COMMON +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx64 -O0 | FileCheck %s --check-prefixes=SM_70,COMMON +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 -mattr=+ptx72 -O0 | FileCheck %s --check-prefixes=SM_90,COMMON @.str = private unnamed_addr constant [12 x i8] c"__CUDA_ARCH\00" @.str1 = constant [11 x i8] c"__CUDA_FTZ\00" declare i32 @__nvvm_reflect(ptr) -; SM_52: .visible .func (.param .b32 func_retval0) foo() -; SM_52: mov.b32 %[[REG:.+]], 3; -; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_52-NEXT: ret; -; -; SM_70: .visible .func (.param .b32 func_retval0) foo() -; SM_70: mov.b32 %[[REG:.+]], 2; -; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_70-NEXT: ret; -; -; SM_90: .visible .func (.param .b32 func_retval0) foo() -; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_90-NEXT: ret; +; COMMON-LABEL: .visible .func (.param .b32 func_retval0) foo() +; SM_52: st.param.b32 [func_retval0], 3; +; SM_70: st.param.b32 [func_retval0], 2; +; SM_90: st.param.b32 [func_retval0], 1; +; COMMON-NEXT: ret; + define i32 @foo() { entry: %call = call i32 @__nvvm_reflect(ptr @.str) @@ -54,20 +46,11 @@ return: ret i32 %retval.0 } -; SM_52: .visible .func (.param .b32 func_retval0) bar() -; SM_52: mov.b32 %[[REG:.+]], 2; -; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_52-NEXT: ret; -; -; SM_70: .visible .func (.param .b32 func_retval0) bar() -; SM_70: mov.b32 %[[REG:.+]], 1; -; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_70-NEXT: ret; -; -; SM_90: .visible .func (.param .b32 func_retval0) bar() -; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_90-NEXT: ret; +; COMMON-LABEL: .visible .func (.param .b32 func_retval0) bar() +; SM_52: st.param.b32 [func_retval0], 2; +; SM_70: st.param.b32 [func_retval0], 1; +; SM_90: st.param.b32 [func_retval0], 1; +; COMMON-NEXT: ret; define i32 @bar() { entry: %call = call i32 @__nvvm_reflect(ptr @.str) @@ -102,20 +85,11 @@ if.end: ret void } -; SM_52: .visible .func (.param .b32 func_retval0) qux() -; SM_52: mov.b32 %[[REG:.+]], 3; -; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_52-NEXT: ret; -; -; SM_70: .visible .func (.param .b32 func_retval0) qux() -; SM_70: mov.b32 %[[REG:.+]], 2; -; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_70-NEXT: ret; -; -; SM_90: .visible .func (.param .b32 func_retval0) qux() -; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_90-NEXT: ret; +; SM_52: .visible .func (.param .b32 func_retval0) qux() +; SM_52: st.param.b32 [func_retval0], 3; +; SM_70: st.param.b32 [func_retval0], 2; +; SM_90: st.param.b32 [func_retval0], 1; +; COMMON-NEXT: ret; define i32 @qux() { entry: %call = call i32 @__nvvm_reflect(ptr noundef @.str) @@ -142,18 +116,9 @@ return: ret i32 %retval } -; SM_52: .visible .func (.param .b32 func_retval0) phi() -; SM_52: mov.b32 %[[REG:.+]], 0f00000000; -; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG]]; -; SM_52-NEXT: ret; -; SM_70: .visible .func (.param .b32 func_retval0) phi() -; SM_70: mov.b32 %[[REG:.+]], 0f00000000; -; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG]]; -; SM_70-NEXT: ret; -; SM_90: .visible .func (.param .b32 func_retval0) phi() -; SM_90: mov.b32 %[[REG:.+]], 0f00000000; -; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG]]; -; SM_90-NEXT: ret; +; COMMON-LABEL: .visible .func (.param .b32 func_retval0) phi() +; COMMON: st.param.b32 [func_retval0], 0; +; COMMON-NEXT: ret; define float @phi() { entry: %0 = call i32 @__nvvm_reflect(ptr @.str) @@ -175,20 +140,11 @@ exit: ret float 0.000000e+00 } -; SM_52: .visible .func (.param .b32 func_retval0) prop() -; SM_52: mov.b32 %[[REG:.+]], 3; -; SM_52-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_52-NEXT: ret; -; -; SM_70: .visible .func (.param .b32 func_retval0) prop() -; SM_70: mov.b32 %[[REG:.+]], 2; -; SM_70-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_70-NEXT: ret; -; -; SM_90: .visible .func (.param .b32 func_retval0) prop() -; SM_90: mov.b32 %[[REG:.+]], 1; -; SM_90-NEXT: st.param.b32 [func_retval0], %[[REG:.+]]; -; SM_90-NEXT: ret; +; COMMON-LABEL: .visible .func (.param .b32 func_retval0) prop() +; SM_52: st.param.b32 [func_retval0], 3; +; SM_70: st.param.b32 [func_retval0], 2; +; SM_90: st.param.b32 [func_retval0], 1; +; COMMON-NEXT: ret; define i32 @prop() { entry: %call = call i32 @__nvvm_reflect(ptr @.str) diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll index cd2664e913824..4fa1235633cf6 100644 --- a/llvm/test/CodeGen/NVPTX/param-add.ll +++ b/llvm/test/CodeGen/NVPTX/param-add.ll @@ -14,33 +14,24 @@ declare i32 @callee(%struct.1float %a) define i32 @test(%struct.1float alignstack(32) %data) { ; CHECK-LABEL: test( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<16>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r1, [test_param_0+1]; -; CHECK-NEXT: shl.b32 %r2, %r1, 8; -; CHECK-NEXT: ld.param.b8 %r3, [test_param_0]; -; CHECK-NEXT: or.b32 %r4, %r2, %r3; -; CHECK-NEXT: ld.param.b8 %r5, [test_param_0+3]; -; CHECK-NEXT: shl.b32 %r6, %r5, 8; -; CHECK-NEXT: ld.param.b8 %r7, [test_param_0+2]; -; CHECK-NEXT: or.b32 %r8, %r6, %r7; -; CHECK-NEXT: shl.b32 %r9, %r8, 16; -; CHECK-NEXT: or.b32 %r10, %r9, %r4; -; CHECK-NEXT: shr.u32 %r11, %r10, 8; -; CHECK-NEXT: shr.u32 %r12, %r10, 16; -; CHECK-NEXT: shr.u32 %r13, %r10, 24; +; CHECK-NEXT: ld.param.b32 %r1, [test_param_0]; +; CHECK-NEXT: shr.u32 %r2, %r1, 8; +; CHECK-NEXT: shr.u32 %r3, %r1, 16; +; CHECK-NEXT: shr.u32 %r4, %r1, 24; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 1 .b8 param0[4]; -; CHECK-NEXT: st.param.b8 [param0], %r10; -; CHECK-NEXT: st.param.b8 [param0+1], %r11; -; CHECK-NEXT: st.param.b8 [param0+2], %r12; -; CHECK-NEXT: st.param.b8 [param0+3], %r13; +; CHECK-NEXT: st.param.b8 [param0], %r1; +; CHECK-NEXT: st.param.b8 [param0+1], %r2; +; CHECK-NEXT: st.param.b8 [param0+2], %r3; +; CHECK-NEXT: st.param.b8 [param0+3], %r4; ; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: call.uni (retval0), callee, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; ; CHECK-NEXT: } // callseq 0 -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %1 = call i32 @callee(%struct.1float %data) diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 263477df1dbfe..6c52bfd6cbfd8 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -28,14 +28,12 @@ ; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; ; CHECK: setp.ne.b16 %p1, [[A]], 0 ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] -; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[C]] +; CHECK: st.param.b32 [param0], [[B]] ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), test_i1, ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; -; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; -; CHECK: st.param.b32 [func_retval0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R8]]; ; CHECK: ret; define i1 @test_i1(i1 %a) { %r = tail call i1 @test_i1(i1 %a); @@ -166,14 +164,12 @@ define i3 @test_i3(i3 %a) { ; CHECK-NEXT: .param .b32 test_i8_param_0 ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0]; ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; -; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[A]]; +; CHECK: st.param.b32 [param0], [[A32]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), test_i8, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; -; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; -; CHECK: st.param.b32 [func_retval0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[R32]]; ; CHECK-NEXT: ret; define i8 @test_i8(i8 %a) { %r = tail call i8 @test_i8(i8 %a); @@ -247,7 +243,14 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK: call.uni (retval0), test_v5i8, ; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: st.param.v4.b8 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]]; +; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]]; +; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U; +; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]]; +; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]]; +; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U; +; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U; +; CHECK-DAG: st.param.b32 [func_retval0], [[P2]]; ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i8> @test_v5i8(<5 x i8> %a) { @@ -280,8 +283,7 @@ define i11 @test_i11(i11 %a) { ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), test_i16, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; -; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; -; CHECK: st.param.b32 [func_retval0], [[R]]; +; CHECK: st.param.b32 [func_retval0], [[RE32]]; ; CHECK-NEXT: ret; define i16 @test_i16(i16 %a) { %r = tail call i16 @test_i16(i16 %a); diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll index f490c5f73d425..88ad0b0a9f9d1 100644 --- a/llvm/test/CodeGen/NVPTX/param-overalign.ll +++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll @@ -106,10 +106,10 @@ define alignstack(8) %struct.float2 @aligned_return(%struct.float2 %a ) { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0+4]; -; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: st.param.b32 [func_retval0+4], %r1; +; CHECK-NEXT: ld.param.b32 %r1, [aligned_return_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [aligned_return_param_0+4]; +; CHECK-NEXT: st.param.b32 [func_retval0+4], %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; ; CHECK-NEXT: ret; ret %struct.float2 %a } diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll index 892e49a5fe82a..a480984a538b3 100644 --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -173,8 +173,8 @@ define internal fastcc [3 x i32] @callee_St4x3(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x3_param_0[12] ; CHECK: ld.param.v2.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x3_param_0]; ; CHECK: ld.param.b32 [[R3:%r[0-9]+]], [callee_St4x3_param_0+8]; - ; CHECK: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]}; - ; CHECK: st.param.b32 [func_retval0+8], [[R3]]; + ; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]}; + ; CHECK-DAG: st.param.b32 [func_retval0+8], [[R3]]; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1 @@ -269,8 +269,8 @@ define internal fastcc [5 x i32] @callee_St4x5(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x5_param_0[20] ; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x5_param_0]; ; CHECK: ld.param.b32 [[R5:%r[0-9]+]], [callee_St4x5_param_0+16]; - ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; - ; CHECK: st.param.b32 [func_retval0+16], [[R5]]; + ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK-DAG: st.param.b32 [func_retval0+16], [[R5]]; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1 @@ -328,8 +328,8 @@ define internal fastcc [6 x i32] @callee_St4x6(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x6_param_0[24] ; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x6_param_0]; ; CHECK: ld.param.v2.b32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x6_param_0+16]; - ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; - ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; + ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK-DAG: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1 @@ -396,9 +396,9 @@ define internal fastcc [7 x i32] @callee_St4x7(ptr nocapture noundef readonly by ; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x7_param_0]; ; CHECK: ld.param.v2.b32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x7_param_0+16]; ; CHECK: ld.param.b32 [[R7:%r[0-9]+]], [callee_St4x7_param_0+24]; - ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; - ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; - ; CHECK: st.param.b32 [func_retval0+24], [[R7]]; + ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK-DAG: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; + ; CHECK-DAG: st.param.b32 [func_retval0+24], [[R7]]; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1 @@ -468,8 +468,8 @@ define internal fastcc [8 x i32] @callee_St4x8(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St4x8_param_0[32] ; CHECK: ld.param.v4.b32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x8_param_0]; ; CHECK: ld.param.v4.b32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]], [[R7:%r[0-9]+]], [[R8:%r[0-9]+]]}, [callee_St4x8_param_0+16]; - ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; - ; CHECK: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]}; + ; CHECK-DAG: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; + ; CHECK-DAG: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]}; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1 @@ -589,8 +589,8 @@ define internal fastcc [3 x i64] @callee_St8x3(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St8x3_param_0[24] ; CHECK: ld.param.v2.b64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x3_param_0]; ; CHECK: ld.param.b64 [[RD3:%rd[0-9]+]], [callee_St8x3_param_0+16]; - ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; - ; CHECK: st.param.b64 [func_retval0+16], [[RD3]]; + ; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; + ; CHECK-DAG: st.param.b64 [func_retval0+16], [[RD3]]; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1 @@ -636,8 +636,8 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by ; CHECK-NEXT: .param .align 16 .b8 callee_St8x4_param_0[32] ; CHECK: ld.param.v2.b64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x4_param_0]; ; CHECK: ld.param.v2.b64 {[[RD3:%rd[0-9]+]], [[RD4:%rd[0-9]+]]}, [callee_St8x4_param_0+16]; - ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; - ; CHECK: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]}; + ; CHECK-DAG: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; + ; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]}; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1 @@ -687,10 +687,10 @@ define external fastcc [4 x i32] @callee_St4x4_external(ptr nocapture noundef re ; CHECK: ld.param.b32 [[R2:%r[0-9]+]], [callee_St4x4_external_param_0+4]; ; CHECK: ld.param.b32 [[R3:%r[0-9]+]], [callee_St4x4_external_param_0+8]; ; CHECK: ld.param.b32 [[R4:%r[0-9]+]], [callee_St4x4_external_param_0+12]; - ; CHECK: st.param.b32 [func_retval0], [[R1]]; - ; CHECK: st.param.b32 [func_retval0+4], [[R2]]; - ; CHECK: st.param.b32 [func_retval0+8], [[R3]]; - ; CHECK: st.param.b32 [func_retval0+12], [[R4]]; + ; CHECK-DAG: st.param.b32 [func_retval0], [[R1]]; + ; CHECK-DAG: st.param.b32 [func_retval0+4], [[R2]]; + ; CHECK-DAG: st.param.b32 [func_retval0+8], [[R3]]; + ; CHECK-DAG: st.param.b32 [func_retval0+12], [[R4]]; ; CHECK-NEXT: ret; %1 = load i32, ptr %in, align 4 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll index 3649ef53b0881..74023c4f5631a 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -15,11 +15,9 @@ define i1 @check_i1() { ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 1; - ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 1; - - ; PTX-DAG: st.param.b32 [func_retval0], [[RES]]; + ; PTX-WITHOUT-DAG: mov.b32 [[RES:%r[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0], [[RES]]; + ; PTX-WITH-DAG: st.param.b32 [func_retval0], [[LD]]; %ret = call i1 @callee_i1() ret i1 %ret @@ -32,11 +30,9 @@ define i16 @check_i16() { ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; - ; PTX-WITHOUT-DAG: and.b32 [[RES:%r[0-9]+]], [[PROXY]], 65535; - ; PTX-WITH-DAG: and.b32 [[RES:%r[0-9]+]], [[LD]], 65535; - - ; PTX-DAG: st.param.b32 [func_retval0], [[RES]]; + ; PTX-WITHOUT-DAG: mov.b32 [[RES:%r[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0], [[RES]]; + ; PTX-WITH-DAG: st.param.b32 [func_retval0], [[LD]]; %ret = call i16 @callee_i16() ret i16 %ret diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir index a1d8d0590f160..5d0d6f6ecd5ff 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir @@ -83,16 +83,16 @@ body: | %5:b32 = ProxyRegB32 killed %1 %6:b32 = ProxyRegB32 killed %2 %7:b32 = ProxyRegB32 killed %3 - ; CHECK: StoreRetvalV4I32 killed %0, killed %1, killed %2, killed %3 - StoreRetvalV4I32 killed %4, killed %5, killed %6, killed %7, 0 + ; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3 + STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101) %8:b32 = LoadParamMemI32 0 ; CHECK-NOT: ProxyReg %9:b32 = ProxyRegB32 killed %8 %10:b32 = ProxyRegB32 killed %9 %11:b32 = ProxyRegB32 killed %10 - ; CHECK: StoreRetvalI32 killed %8 - StoreRetvalI32 killed %11, 0 + ; CHECK: ST_i32 killed %8 + ST_i32 killed %11, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101) Return ... diff --git a/llvm/test/CodeGen/NVPTX/shift-opt.ll b/llvm/test/CodeGen/NVPTX/shift-opt.ll index f0813609268e9..e7866b01064c7 100644 --- a/llvm/test/CodeGen/NVPTX/shift-opt.ll +++ b/llvm/test/CodeGen/NVPTX/shift-opt.ll @@ -70,16 +70,17 @@ define i64 @test_and(i64 %x, i32 %y) { define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) { ; CHECK-LABEL: test_vec( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<5>; +; CHECK-NEXT: .reg .b16 %rs<7>; ; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_vec_param_0]; -; CHECK-NEXT: ld.param.b32 %r1, [test_vec_param_1]; +; CHECK-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [test_vec_param_1]; +; CHECK-NEXT: mov.b32 %r1, {%rs3, %rs4}; ; CHECK-NEXT: and.b32 %r2, %r1, 16711935; -; CHECK-NEXT: shr.u16 %rs3, %rs2, 5; -; CHECK-NEXT: shr.u16 %rs4, %rs1, 5; -; CHECK-NEXT: mov.b32 %r3, {%rs4, %rs3}; +; CHECK-NEXT: shr.u16 %rs5, %rs2, 5; +; CHECK-NEXT: shr.u16 %rs6, %rs1, 5; +; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5}; ; CHECK-NEXT: or.b32 %r4, %r3, %r2; ; CHECK-NEXT: st.param.b32 [func_retval0], %r4; ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/tid-range.ll b/llvm/test/CodeGen/NVPTX/tid-range.ll index 27bc19bfd6c4a..019814e47c2b1 100644 --- a/llvm/test/CodeGen/NVPTX/tid-range.ll +++ b/llvm/test/CodeGen/NVPTX/tid-range.ll @@ -1,20 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %} declare i32 @get_register() define i1 @test1() { +; CHECK-LABEL: test1( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mov.u32 %r1, %tid.x; +; CHECK-NEXT: setp.eq.s32 %p1, %r1, 1; +; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; entry: %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !0 %cmp = icmp eq i32 %call, 1 ret i1 %cmp } -; CHECK-LABEL: test1( -; CHECK: setp.eq.s32 %p1, %r1, 1; -; CHECK: selp.b32 %[[R:.+]], 1, 0, %p1; -; CHECK: st.param.b32 [func_retval0], %[[R]]; - declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() !0 = !{ i32 0, i32 3 } diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll index 178ee7ff6db18..e4e668018d872 100644 --- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; Verifies correctness of load/store of parameters and return values. ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} @@ -19,339 +20,489 @@ ; -- Only loading and storing the said fields are checked in the following ; series of tests so that they are more concise. -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[16]) -; CHECK-LABEL: test_s_i8i16p( -; CHECK: .param .align 8 .b8 test_s_i8i16p_param_0[16] -; CHECK-DAG: ld.param.b16 [[P0:%rs[0-9]+]], [test_s_i8i16p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%rs[0-9]+]], [test_s_i8i16p_param_0+3]; -; CHECK-DAG: ld.param.b8 [[P2_1:%rs[0-9]+]], [test_s_i8i16p_param_0+4]; -; CHECK-DAG: shl.b16 [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK: { // callseq -; CHECK: .param .align 8 .b8 param0[16]; -; CHECK-DAG: st.param.b16 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; -; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; -; CHECK: .param .align 8 .b8 retval0[16]; -; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0); -; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+3]; -; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+4]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b16 [func_retval0], [[R0]]; -; CHECK-DAG: shl.b16 [[R2_1_shl:%rs[0-9]+]], [[R2_1]], 8; -; CHECK-DAG: and.b16 [[R2_0_and:%rs[0-9]+]], [[R2_0]], 255; -; CHECK-DAG: or.b16 [[R2:%rs[0-9]+]], [[R2_0_and]], [[R2_1_shl]]; -; CHECK-DAG: st.param.b8 [func_retval0+3], [[R2]]; -; CHECK-DAG: and.b16 [[R2_1_and:%rs[0-9]+]], [[R2_1]], 255; -; CHECK-DAG: st.param.b8 [func_retval0+4], [[R2_1_and]]; -; CHECK: ret; define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { - %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) - ret %s_i8i16p %r +; CHECK-LABEL: test_s_i8i16p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<15>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4]; +; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; +; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3]; +; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8]; +; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2]; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[16]; +; CHECK-NEXT: st.param.b16 [param0], %rs1; +; CHECK-NEXT: st.param.b8 [param0+2], %rs2; +; CHECK-NEXT: st.param.b8 [param0+3], %rs3; +; CHECK-NEXT: st.param.b8 [param0+4], %rs4; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0); +; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; +; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; +; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; +; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; +; CHECK-NEXT: ret; + %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) + ret %s_i8i16p %r } -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24]) -; CHECK-LABEL: test_s_i8i32p( -; CHECK: .param .align 8 .b8 test_s_i8i32p_param_0[24] -; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8i32p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%r[0-9]+]], [test_s_i8i32p_param_0+5]; -; CHECK-DAG: ld.param.b8 [[P2_1:%r[0-9]+]], [test_s_i8i32p_param_0+6]; -; CHECK-DAG: ld.param.b8 [[P2_2:%r[0-9]+]], [test_s_i8i32p_param_0+7]; -; CHECK-DAG: ld.param.b8 [[P2_3:%r[0-9]+]], [test_s_i8i32p_param_0+8]; -; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16; -; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24; -; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; -; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]]; -; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8; -; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; -; CHECK: { // callseq -; CHECK-DAG: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.b32 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; -; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; -; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; -; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; -; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0); -; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; -; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; -; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; -; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; -; CHECK-DAG: st.param.b8 [func_retval0+5], -; CHECK-DAG: st.param.b8 [func_retval0+6], -; CHECK-DAG: st.param.b8 [func_retval0+7], -; CHECK-DAG: st.param.b8 [func_retval0+8], -; CHECK: ret; define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { - %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) - ret %s_i8i32p %r +; CHECK-LABEL: test_s_i8i32p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<12>; +; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6]; +; CHECK-NEXT: shl.b32 %r4, %r3, 8; +; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5]; +; CHECK-NEXT: or.b32 %r6, %r4, %r5; +; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7]; +; CHECK-NEXT: shl.b32 %r8, %r7, 16; +; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8]; +; CHECK-NEXT: shl.b32 %r10, %r9, 24; +; CHECK-NEXT: or.b32 %r11, %r10, %r8; +; CHECK-NEXT: or.b32 %r2, %r11, %r6; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4]; +; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0]; +; CHECK-NEXT: shr.u32 %r12, %r2, 8; +; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[24]; +; CHECK-NEXT: st.param.b32 [param0], %r1; +; CHECK-NEXT: st.param.b8 [param0+4], %rs1; +; CHECK-NEXT: st.param.b8 [param0+5], %r2; +; CHECK-NEXT: st.param.b8 [param0+6], %r12; +; CHECK-NEXT: st.param.b8 [param0+7], %r13; +; CHECK-NEXT: st.param.b8 [param0+8], %r9; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0); +; CHECK-NEXT: ld.param.b32 %r14, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: cvt.u32.u16 %r16, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r17, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r18, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r19, %rs6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r14; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16; +; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: ret; + %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) + ret %s_i8i32p %r } -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[32]) -; CHECK-LABEL: test_s_i8i64p( -; CHECK: .param .align 8 .b8 test_s_i8i64p_param_0[32] -; CHECK-DAG: ld.param.b64 [[P0:%rd[0-9]+]], [test_s_i8i64p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%rd[0-9]+]], [test_s_i8i64p_param_0+9]; -; CHECK-DAG: ld.param.b8 [[P2_1:%rd[0-9]+]], [test_s_i8i64p_param_0+10]; -; CHECK-DAG: ld.param.b8 [[P2_2:%rd[0-9]+]], [test_s_i8i64p_param_0+11]; -; CHECK-DAG: ld.param.b8 [[P2_3:%rd[0-9]+]], [test_s_i8i64p_param_0+12]; -; CHECK-DAG: ld.param.b8 [[P2_4:%rd[0-9]+]], [test_s_i8i64p_param_0+13]; -; CHECK-DAG: ld.param.b8 [[P2_5:%rd[0-9]+]], [test_s_i8i64p_param_0+14]; -; CHECK-DAG: ld.param.b8 [[P2_6:%rd[0-9]+]], [test_s_i8i64p_param_0+15]; -; CHECK-DAG: ld.param.b8 [[P2_7:%rd[0-9]+]], [test_s_i8i64p_param_0+16]; -; CHECK-DAG: shl.b64 [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: shl.b64 [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16; -; CHECK-DAG: shl.b64 [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24; -; CHECK-DAG: or.b64 [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK-DAG: or.b64 [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; -; CHECK-DAG: or.b64 [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]]; -; CHECK-DAG: shl.b64 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8; -; CHECK-DAG: shl.b64 [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16; -; CHECK-DAG: shl.b64 [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24; -; CHECK-DAG: or.b64 [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]]; -; CHECK-DAG: or.b64 [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]]; -; CHECK-DAG: or.b64 [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]]; -; CHECK-DAG: shl.b64 [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32; -; CHECK-DAG: or.b64 [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]]; -; CHECK-DAG: shr.u64 [[P2_shr_1:%rd[0-9]+]], [[P2]], 8; -; CHECK-DAG: shr.u64 [[P2_shr_2:%rd[0-9]+]], [[P2]], 16; -; CHECK-DAG: shr.u64 [[P2_shr_3:%rd[0-9]+]], [[P2]], 24; -; CHECK-DAG: bfe.u64 [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24; -; CHECK-DAG: bfe.u64 [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16; -; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8; -; CHECK: { // callseq -; CHECK: .param .align 8 .b8 param0[32]; -; CHECK-DAG: st.param.b64 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+9], [[P2]]; -; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]]; -; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]]; -; CHECK-DAG: st.param.b8 [param0+12], [[P2_shr_3]]; -; CHECK-DAG: st.param.b8 [param0+13], [[P2_or_5]]; -; CHECK-DAG: st.param.b8 [param0+14], [[P2_bfe_4]]; -; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]]; -; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]]; -; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0); -; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; -; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; -; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11]; -; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+12]; -; CHECK-DAG: ld.param.b8 [[R2_4:%rs[0-9]+]], [retval0+13]; -; CHECK-DAG: ld.param.b8 [[R2_5:%rs[0-9]+]], [retval0+14]; -; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15]; -; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b64 [func_retval0], [[R0]]; -; CHECK-DAG: st.param.b8 [func_retval0+9], -; CHECK-DAG: st.param.b8 [func_retval0+10], -; CHECK-DAG: st.param.b8 [func_retval0+11], -; CHECK-DAG: st.param.b8 [func_retval0+12], -; CHECK-DAG: st.param.b8 [func_retval0+13], -; CHECK-DAG: st.param.b8 [func_retval0+14], -; CHECK-DAG: st.param.b8 [func_retval0+15], -; CHECK-DAG: st.param.b8 [func_retval0+16], -; CHECK: ret; define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { - %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) - ret %s_i8i64p %r +; CHECK-LABEL: test_s_i8i64p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<20>; +; CHECK-NEXT: .reg .b64 %rd<68>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10]; +; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; +; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9]; +; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; +; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11]; +; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; +; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12]; +; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; +; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; +; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; +; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14]; +; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; +; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13]; +; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; +; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15]; +; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; +; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16]; +; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; +; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; +; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; +; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; +; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0]; +; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; +; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; +; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; +; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; +; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; +; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[32]; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: st.param.b8 [param0+8], %rs1; +; CHECK-NEXT: st.param.b8 [param0+9], %rd2; +; CHECK-NEXT: st.param.b8 [param0+10], %rd25; +; CHECK-NEXT: st.param.b8 [param0+11], %rd26; +; CHECK-NEXT: st.param.b8 [param0+12], %rd27; +; CHECK-NEXT: st.param.b8 [param0+13], %rd23; +; CHECK-NEXT: st.param.b8 [param0+14], %rd28; +; CHECK-NEXT: st.param.b8 [param0+15], %rd29; +; CHECK-NEXT: st.param.b8 [param0+16], %rd30; +; CHECK-NEXT: st.param.b64 [param0+24], %rd3; +; CHECK-NEXT: .param .align 8 .b8 retval0[32]; +; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0); +; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; +; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; +; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; +; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; +; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; +; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: cvt.u64.u16 %rd35, %rs3; +; CHECK-NEXT: and.b64 %rd36, %rd35, 255; +; CHECK-NEXT: cvt.u64.u16 %rd37, %rs4; +; CHECK-NEXT: and.b64 %rd38, %rd37, 255; +; CHECK-NEXT: shl.b64 %rd39, %rd38, 8; +; CHECK-NEXT: or.b64 %rd40, %rd36, %rd39; +; CHECK-NEXT: cvt.u64.u16 %rd41, %rs5; +; CHECK-NEXT: and.b64 %rd42, %rd41, 255; +; CHECK-NEXT: shl.b64 %rd43, %rd42, 16; +; CHECK-NEXT: or.b64 %rd44, %rd40, %rd43; +; CHECK-NEXT: cvt.u64.u16 %rd45, %rs6; +; CHECK-NEXT: and.b64 %rd46, %rd45, 255; +; CHECK-NEXT: shl.b64 %rd47, %rd46, 24; +; CHECK-NEXT: or.b64 %rd48, %rd44, %rd47; +; CHECK-NEXT: cvt.u64.u16 %rd49, %rs7; +; CHECK-NEXT: and.b64 %rd50, %rd49, 255; +; CHECK-NEXT: shl.b64 %rd51, %rd50, 32; +; CHECK-NEXT: or.b64 %rd52, %rd48, %rd51; +; CHECK-NEXT: cvt.u64.u16 %rd53, %rs8; +; CHECK-NEXT: and.b64 %rd54, %rd53, 255; +; CHECK-NEXT: shl.b64 %rd55, %rd54, 40; +; CHECK-NEXT: or.b64 %rd56, %rd52, %rd55; +; CHECK-NEXT: cvt.u64.u16 %rd57, %rs9; +; CHECK-NEXT: and.b64 %rd58, %rd57, 255; +; CHECK-NEXT: shl.b64 %rd59, %rd58, 48; +; CHECK-NEXT: or.b64 %rd60, %rd56, %rd59; +; CHECK-NEXT: cvt.u64.u16 %rd61, %rs10; +; CHECK-NEXT: shl.b64 %rd62, %rd61, 56; +; CHECK-NEXT: or.b64 %rd63, %rd60, %rd62; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; +; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd45; +; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd41; +; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd37; +; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd35; +; CHECK-NEXT: shr.u64 %rd64, %rd52, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; +; CHECK-NEXT: shr.u64 %rd65, %rd56, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; +; CHECK-NEXT: shr.u64 %rd66, %rd60, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; +; CHECK-NEXT: shr.u64 %rd67, %rd63, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; +; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; +; CHECK-NEXT: ret; + %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) + ret %s_i8i64p %r } -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[16]) -; CHECK-LABEL: test_s_i8f16p( -; CHECK: .param .align 8 .b8 test_s_i8f16p_param_0[16] -; CHECK-DAG: ld.param.b16 [[P0:%rs[0-9]+]], [test_s_i8f16p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%rs[0-9]+]], [test_s_i8f16p_param_0+3]; -; CHECK-DAG: ld.param.b8 [[P2_1:%rs[0-9]+]], [test_s_i8f16p_param_0+4]; -; CHECK-DAG: shl.b16 [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: or.b16 [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK: { // callseq -; CHECK: .param .align 8 .b8 param0[16]; -; CHECK-DAG: st.param.b16 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+3], [[P2_1_or]]; -; CHECK-DAG: st.param.b8 [param0+4], [[P2_1]]; -; CHECK: .param .align 8 .b8 retval0[16]; -; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0); -; CHECK-DAG: ld.param.b16 [[R0:%rs[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2I_0:%rs[0-9]+]], [retval0+3]; -; CHECK-DAG: ld.param.b8 [[R2I_1:%rs[0-9]+]], [retval0+4]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b16 [func_retval0], [[R0]]; -; CHECK-DAG: shl.b16 [[R2I_1_shl:%rs[0-9]+]], [[R2I_1]], 8; -; CHECK-DAG: and.b16 [[R2I_0_and:%rs[0-9]+]], [[R2I_0]], 255; -; CHECK-DAG: or.b16 [[R2I:%rs[0-9]+]], [[R2I_0_and]], [[R2I_1_shl]]; -; CHECK-DAG: st.param.b8 [func_retval0+3], [[R2I]]; -; CHECK-DAG: and.b16 [[R2I_1_and:%rs[0-9]+]], [[R2I_1]], 255; -; CHECK-DAG: st.param.b8 [func_retval0+4], [[R2I_1_and]]; -; CHECK: ret; define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { - %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) - ret %s_i8f16p %r +; CHECK-LABEL: test_s_i8f16p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<15>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4]; +; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; +; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3]; +; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8]; +; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2]; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0]; +; CHECK-NEXT: { // callseq 3, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[16]; +; CHECK-NEXT: st.param.b16 [param0], %rs1; +; CHECK-NEXT: st.param.b8 [param0+2], %rs2; +; CHECK-NEXT: st.param.b8 [param0+3], %rs3; +; CHECK-NEXT: st.param.b8 [param0+4], %rs4; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0); +; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; +; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; +; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; +; CHECK-NEXT: } // callseq 3 +; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; +; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; +; CHECK-NEXT: ret; + %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) + ret %s_i8f16p %r } -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24]) -; CHECK-LABEL: test_s_i8f16x2p( -; CHECK: .param .align 8 .b8 test_s_i8f16x2p_param_0[24] -; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8f16x2p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%r[0-9]+]], [test_s_i8f16x2p_param_0+5]; -; CHECK-DAG: ld.param.b8 [[P2_1:%r[0-9]+]], [test_s_i8f16x2p_param_0+6]; -; CHECK-DAG: ld.param.b8 [[P2_2:%r[0-9]+]], [test_s_i8f16x2p_param_0+7]; -; CHECK-DAG: ld.param.b8 [[P2_3:%r[0-9]+]], [test_s_i8f16x2p_param_0+8]; -; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16; -; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24; -; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; -; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]]; -; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8; -; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; -; CHECK: { // callseq -; CHECK-DAG: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.b32 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; -; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; -; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; -; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; -; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0); -; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; -; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; -; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; -; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; -; CHECK-DAG: st.param.b8 [func_retval0+5], -; CHECK-DAG: st.param.b8 [func_retval0+6], -; CHECK-DAG: st.param.b8 [func_retval0+7], -; CHECK-DAG: st.param.b8 [func_retval0+8], -; CHECK: ret; define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { - %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) - ret %s_i8f16x2p %r +; CHECK-LABEL: test_s_i8f16x2p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<12>; +; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6]; +; CHECK-NEXT: shl.b32 %r4, %r3, 8; +; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5]; +; CHECK-NEXT: or.b32 %r6, %r4, %r5; +; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7]; +; CHECK-NEXT: shl.b32 %r8, %r7, 16; +; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8]; +; CHECK-NEXT: shl.b32 %r10, %r9, 24; +; CHECK-NEXT: or.b32 %r11, %r10, %r8; +; CHECK-NEXT: or.b32 %r2, %r11, %r6; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4]; +; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0]; +; CHECK-NEXT: shr.u32 %r12, %r2, 8; +; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: { // callseq 4, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[24]; +; CHECK-NEXT: st.param.b32 [param0], %r1; +; CHECK-NEXT: st.param.b8 [param0+4], %rs1; +; CHECK-NEXT: st.param.b8 [param0+5], %r2; +; CHECK-NEXT: st.param.b8 [param0+6], %r12; +; CHECK-NEXT: st.param.b8 [param0+7], %r13; +; CHECK-NEXT: st.param.b8 [param0+8], %r9; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0); +; CHECK-NEXT: ld.param.b32 %r14, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: } // callseq 4 +; CHECK-NEXT: cvt.u32.u16 %r16, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r17, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r18, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r19, %rs6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r14; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16; +; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: ret; + %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) + ret %s_i8f16x2p %r } -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[24]) -; CHECK-LABEL: test_s_i8f32p( -; CHECK: .param .align 8 .b8 test_s_i8f32p_param_0[24] -; CHECK-DAG: ld.param.b32 [[P0:%r[0-9]+]], [test_s_i8f32p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%r[0-9]+]], [test_s_i8f32p_param_0+5]; -; CHECK-DAG: ld.param.b8 [[P2_1:%r[0-9]+]], [test_s_i8f32p_param_0+6]; -; CHECK-DAG: ld.param.b8 [[P2_2:%r[0-9]+]], [test_s_i8f32p_param_0+7]; -; CHECK-DAG: ld.param.b8 [[P2_3:%r[0-9]+]], [test_s_i8f32p_param_0+8]; -; CHECK-DAG: shl.b32 [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: shl.b32 [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16; -; CHECK-DAG: shl.b32 [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24; -; CHECK-DAG: or.b32 [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK-DAG: or.b32 [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; -; CHECK-DAG: or.b32 [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]]; -; CHECK-DAG: shr.u32 [[P2_1_shr:%r[0-9]+]], [[P2]], 8; -; CHECK-DAG: shr.u32 [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16; -; CHECK: { // callseq -; CHECK-DAG: .param .align 8 .b8 param0[24]; -; CHECK-DAG: st.param.b32 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+5], [[P2]]; -; CHECK-DAG: st.param.b8 [param0+6], [[P2_1_shr]]; -; CHECK-DAG: st.param.b8 [param0+7], [[P2_2_shr]]; -; CHECK-DAG: st.param.b8 [param0+8], [[P2_3]]; -; CHECK: .param .align 8 .b8 retval0[24]; -; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0); -; CHECK-DAG: ld.param.b32 [[R0:%r[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+5]; -; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+6]; -; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+7]; -; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+8]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b32 [func_retval0], [[R0]]; -; CHECK-DAG: st.param.b8 [func_retval0+5], -; CHECK-DAG: st.param.b8 [func_retval0+6], -; CHECK-DAG: st.param.b8 [func_retval0+7], -; CHECK-DAG: st.param.b8 [func_retval0+8], -; CHECK: ret; define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { - %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) - ret %s_i8f32p %r +; CHECK-LABEL: test_s_i8f32p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<12>; +; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6]; +; CHECK-NEXT: shl.b32 %r4, %r3, 8; +; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5]; +; CHECK-NEXT: or.b32 %r6, %r4, %r5; +; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7]; +; CHECK-NEXT: shl.b32 %r8, %r7, 16; +; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8]; +; CHECK-NEXT: shl.b32 %r10, %r9, 24; +; CHECK-NEXT: or.b32 %r11, %r10, %r8; +; CHECK-NEXT: or.b32 %r2, %r11, %r6; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4]; +; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0]; +; CHECK-NEXT: shr.u32 %r12, %r2, 8; +; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: { // callseq 5, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[24]; +; CHECK-NEXT: st.param.b32 [param0], %r1; +; CHECK-NEXT: st.param.b8 [param0+4], %rs1; +; CHECK-NEXT: st.param.b8 [param0+5], %r2; +; CHECK-NEXT: st.param.b8 [param0+6], %r12; +; CHECK-NEXT: st.param.b8 [param0+7], %r13; +; CHECK-NEXT: st.param.b8 [param0+8], %r9; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0); +; CHECK-NEXT: ld.param.b32 %r14, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: } // callseq 5 +; CHECK-NEXT: cvt.u32.u16 %r16, %rs3; +; CHECK-NEXT: cvt.u32.u16 %r17, %rs4; +; CHECK-NEXT: cvt.u32.u16 %r18, %rs5; +; CHECK-NEXT: cvt.u32.u16 %r19, %rs6; +; CHECK-NEXT: st.param.b32 [func_retval0], %r14; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r19; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r18; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r17; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r16; +; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: ret; + %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) + ret %s_i8f32p %r } -; CHECK: .visible .func (.param .align 8 .b8 func_retval0[32]) -; CHECK-LABEL: test_s_i8f64p( -; CHECK: .param .align 8 .b8 test_s_i8f64p_param_0[32] -; CHECK-DAG: ld.param.b64 [[P0:%rd[0-9]+]], [test_s_i8f64p_param_0]; -; CHECK-DAG: ld.param.b8 [[P2_0:%rd[0-9]+]], [test_s_i8f64p_param_0+9]; -; CHECK-DAG: ld.param.b8 [[P2_1:%rd[0-9]+]], [test_s_i8f64p_param_0+10]; -; CHECK-DAG: ld.param.b8 [[P2_2:%rd[0-9]+]], [test_s_i8f64p_param_0+11]; -; CHECK-DAG: ld.param.b8 [[P2_3:%rd[0-9]+]], [test_s_i8f64p_param_0+12]; -; CHECK-DAG: ld.param.b8 [[P2_4:%rd[0-9]+]], [test_s_i8f64p_param_0+13]; -; CHECK-DAG: ld.param.b8 [[P2_5:%rd[0-9]+]], [test_s_i8f64p_param_0+14]; -; CHECK-DAG: ld.param.b8 [[P2_6:%rd[0-9]+]], [test_s_i8f64p_param_0+15]; -; CHECK-DAG: ld.param.b8 [[P2_7:%rd[0-9]+]], [test_s_i8f64p_param_0+16]; -; CHECK-DAG: shl.b64 [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8; -; CHECK-DAG: shl.b64 [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16; -; CHECK-DAG: shl.b64 [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24; -; CHECK-DAG: or.b64 [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]]; -; CHECK-DAG: or.b64 [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]]; -; CHECK-DAG: or.b64 [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]]; -; CHECK-DAG: shl.b64 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8; -; CHECK-DAG: shl.b64 [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16; -; CHECK-DAG: shl.b64 [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24; -; CHECK-DAG: or.b64 [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]]; -; CHECK-DAG: or.b64 [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]]; -; CHECK-DAG: or.b64 [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]]; -; CHECK-DAG: shl.b64 [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32; -; CHECK-DAG: or.b64 [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]]; -; CHECK-DAG: shr.u64 [[P2_shr_1:%rd[0-9]+]], [[P2]], 8; -; CHECK-DAG: shr.u64 [[P2_shr_2:%rd[0-9]+]], [[P2]], 16; -; CHECK-DAG: shr.u64 [[P2_shr_3:%rd[0-9]+]], [[P2]], 24; -; CHECK-DAG: bfe.u64 [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24; -; CHECK-DAG: bfe.u64 [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16; -; CHECK-DAG: bfe.u64 [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8; -; CHECK: { // callseq -; CHECK: .param .align 8 .b8 param0[32]; -; CHECK-DAG: st.param.b64 [param0], [[P0]]; -; CHECK-DAG: st.param.b8 [param0+9], [[P2]]; -; CHECK-DAG: st.param.b8 [param0+10], [[P2_shr_1]]; -; CHECK-DAG: st.param.b8 [param0+11], [[P2_shr_2]]; -; CHECK-DAG: st.param.b8 [param0+12], [[P2_shr_3]]; -; CHECK-DAG: st.param.b8 [param0+13], [[P2_or_5]]; -; CHECK-DAG: st.param.b8 [param0+14], [[P2_bfe_4]]; -; CHECK-DAG: st.param.b8 [param0+15], [[P2_bfe_5]]; -; CHECK-DAG: st.param.b8 [param0+16], [[P2_bfe_6]]; -; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0); -; CHECK-DAG: ld.param.b64 [[R0:%rd[0-9]+]], [retval0]; -; CHECK-DAG: ld.param.b8 [[R2_0:%rs[0-9]+]], [retval0+9]; -; CHECK-DAG: ld.param.b8 [[R2_1:%rs[0-9]+]], [retval0+10]; -; CHECK-DAG: ld.param.b8 [[R2_2:%rs[0-9]+]], [retval0+11]; -; CHECK-DAG: ld.param.b8 [[R2_3:%rs[0-9]+]], [retval0+12]; -; CHECK-DAG: ld.param.b8 [[R2_4:%rs[0-9]+]], [retval0+13]; -; CHECK-DAG: ld.param.b8 [[R2_5:%rs[0-9]+]], [retval0+14]; -; CHECK-DAG: ld.param.b8 [[R2_6:%rs[0-9]+]], [retval0+15]; -; CHECK-DAG: ld.param.b8 [[R2_7:%rs[0-9]+]], [retval0+16]; -; CHECK: } // callseq -; CHECK-DAG: st.param.b64 [func_retval0], [[R0]]; -; CHECK-DAG: st.param.b8 [func_retval0+9], -; CHECK-DAG: st.param.b8 [func_retval0+10], -; CHECK-DAG: st.param.b8 [func_retval0+11], -; CHECK-DAG: st.param.b8 [func_retval0+12], -; CHECK-DAG: st.param.b8 [func_retval0+13], -; CHECK-DAG: st.param.b8 [func_retval0+14], -; CHECK-DAG: st.param.b8 [func_retval0+15], -; CHECK-DAG: st.param.b8 [func_retval0+16], -; CHECK: ret; define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) { - %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) - ret %s_i8f64p %r +; CHECK-LABEL: test_s_i8f64p( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<20>; +; CHECK-NEXT: .reg .b64 %rd<68>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10]; +; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; +; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9]; +; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; +; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11]; +; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; +; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12]; +; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; +; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; +; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; +; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14]; +; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; +; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13]; +; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; +; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15]; +; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; +; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16]; +; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; +; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; +; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; +; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; +; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0]; +; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; +; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; +; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; +; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; +; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; +; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; +; CHECK-NEXT: { // callseq 6, 0 +; CHECK-NEXT: .param .align 8 .b8 param0[32]; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: st.param.b8 [param0+8], %rs1; +; CHECK-NEXT: st.param.b8 [param0+9], %rd2; +; CHECK-NEXT: st.param.b8 [param0+10], %rd25; +; CHECK-NEXT: st.param.b8 [param0+11], %rd26; +; CHECK-NEXT: st.param.b8 [param0+12], %rd27; +; CHECK-NEXT: st.param.b8 [param0+13], %rd23; +; CHECK-NEXT: st.param.b8 [param0+14], %rd28; +; CHECK-NEXT: st.param.b8 [param0+15], %rd29; +; CHECK-NEXT: st.param.b8 [param0+16], %rd30; +; CHECK-NEXT: st.param.b64 [param0+24], %rd3; +; CHECK-NEXT: .param .align 8 .b8 retval0[32]; +; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0); +; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; +; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; +; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; +; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; +; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; +; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; +; CHECK-NEXT: } // callseq 6 +; CHECK-NEXT: cvt.u64.u16 %rd35, %rs3; +; CHECK-NEXT: and.b64 %rd36, %rd35, 255; +; CHECK-NEXT: cvt.u64.u16 %rd37, %rs4; +; CHECK-NEXT: and.b64 %rd38, %rd37, 255; +; CHECK-NEXT: shl.b64 %rd39, %rd38, 8; +; CHECK-NEXT: or.b64 %rd40, %rd36, %rd39; +; CHECK-NEXT: cvt.u64.u16 %rd41, %rs5; +; CHECK-NEXT: and.b64 %rd42, %rd41, 255; +; CHECK-NEXT: shl.b64 %rd43, %rd42, 16; +; CHECK-NEXT: or.b64 %rd44, %rd40, %rd43; +; CHECK-NEXT: cvt.u64.u16 %rd45, %rs6; +; CHECK-NEXT: and.b64 %rd46, %rd45, 255; +; CHECK-NEXT: shl.b64 %rd47, %rd46, 24; +; CHECK-NEXT: or.b64 %rd48, %rd44, %rd47; +; CHECK-NEXT: cvt.u64.u16 %rd49, %rs7; +; CHECK-NEXT: and.b64 %rd50, %rd49, 255; +; CHECK-NEXT: shl.b64 %rd51, %rd50, 32; +; CHECK-NEXT: or.b64 %rd52, %rd48, %rd51; +; CHECK-NEXT: cvt.u64.u16 %rd53, %rs8; +; CHECK-NEXT: and.b64 %rd54, %rd53, 255; +; CHECK-NEXT: shl.b64 %rd55, %rd54, 40; +; CHECK-NEXT: or.b64 %rd56, %rd52, %rd55; +; CHECK-NEXT: cvt.u64.u16 %rd57, %rs9; +; CHECK-NEXT: and.b64 %rd58, %rd57, 255; +; CHECK-NEXT: shl.b64 %rd59, %rd58, 48; +; CHECK-NEXT: or.b64 %rd60, %rd56, %rd59; +; CHECK-NEXT: cvt.u64.u16 %rd61, %rs10; +; CHECK-NEXT: shl.b64 %rd62, %rd61, 56; +; CHECK-NEXT: or.b64 %rd63, %rd60, %rd62; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; +; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd45; +; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd41; +; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd37; +; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd35; +; CHECK-NEXT: shr.u64 %rd64, %rd52, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; +; CHECK-NEXT: shr.u64 %rd65, %rd56, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; +; CHECK-NEXT: shr.u64 %rd66, %rd60, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; +; CHECK-NEXT: shr.u64 %rd67, %rd63, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; +; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; +; CHECK-NEXT: ret; + %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) + ret %s_i8f64p %r } diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index ab9202650577a..167d7faafe5b3 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -142,7 +142,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: .reg .b64 %SP; ; CHECK-PTX-NEXT: .reg .b64 %SPL; ; CHECK-PTX-NEXT: .reg .b16 %rs<4>; -; CHECK-PTX-NEXT: .reg .b32 %r<7>; +; CHECK-PTX-NEXT: .reg .b32 %r<6>; ; CHECK-PTX-NEXT: .reg .b64 %rd<9>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry @@ -165,8 +165,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3; ; CHECK-PTX-NEXT: cvt.u64.u32 %rd7, %r5; ; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd6; -; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8; -; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd8; ; CHECK-PTX-NEXT: ret; entry: %vlist = alloca ptr, align 8 @@ -310,7 +309,6 @@ entry: define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, ...) { ; CHECK-PTX-LABEL: variadics4( ; CHECK-PTX: { -; CHECK-PTX-NEXT: .reg .b32 %r<2>; ; CHECK-PTX-NEXT: .reg .b64 %rd<10>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry @@ -322,8 +320,7 @@ define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, . ; CHECK-PTX-NEXT: ld.param.b64 %rd7, [variadics4_param_0+8]; ; CHECK-PTX-NEXT: add.s64 %rd8, %rd6, %rd7; ; CHECK-PTX-NEXT: add.s64 %rd9, %rd8, %rd5; -; CHECK-PTX-NEXT: cvt.u32.u64 %r1, %rd9; -; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd9; ; CHECK-PTX-NEXT: ret; entry: %vlist = alloca ptr, align 8 diff --git a/llvm/test/CodeGen/NVPTX/vector-returns.ll b/llvm/test/CodeGen/NVPTX/vector-returns.ll index 2001d199ce0a7..472cdee11a4bd 100644 --- a/llvm/test/CodeGen/NVPTX/vector-returns.ll +++ b/llvm/test/CodeGen/NVPTX/vector-returns.ll @@ -6,12 +6,11 @@ target triple = "nvptx-nvidia-cuda" define <3 x i64> @long3() { ; CHECK-LABEL: long3( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b64 %rd1, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd1}; -; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0+16], 0; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <3 x i64> zeroinitializer } @@ -19,11 +18,10 @@ define <3 x i64> @long3() { define <2 x i64> @long2() { ; CHECK-LABEL: long2( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b64 %rd1, 0; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd1}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <2 x i64> zeroinitializer } @@ -31,11 +29,10 @@ define <2 x i64> @long2() { define <1 x i64> @long1() { ; CHECK-LABEL: long1( ; CHECK: { -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b64 %rd1, 0; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: st.param.b64 [func_retval0], 0; ; CHECK-NEXT: ret; ret <1 x i64> zeroinitializer } @@ -43,12 +40,11 @@ define <1 x i64> @long1() { define <5 x i32> @int5() { ; CHECK-LABEL: int5( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; -; CHECK-NEXT: st.param.b32 [func_retval0+16], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0+16], 0; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0}; ; CHECK-NEXT: ret; ret <5 x i32> zeroinitializer } @@ -56,11 +52,10 @@ define <5 x i32> @int5() { define <4 x i32> @int4() { ; CHECK-LABEL: int4( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0}; ; CHECK-NEXT: ret; ret <4 x i32> zeroinitializer } @@ -68,12 +63,11 @@ define <4 x i32> @int4() { define <3 x i32> @int3() { ; CHECK-LABEL: int3( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; -; CHECK-NEXT: st.param.b32 [func_retval0+8], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0+8], 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <3 x i32> zeroinitializer } @@ -81,11 +75,10 @@ define <3 x i32> @int3() { define <2 x i32> @int2() { ; CHECK-LABEL: int2( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <2 x i32> zeroinitializer } @@ -93,11 +86,10 @@ define <2 x i32> @int2() { define <1 x i32> @int1() { ; CHECK-LABEL: int1( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <1 x i32> zeroinitializer } @@ -105,13 +97,12 @@ define <1 x i32> @int1() { define <9 x i16> @short9() { ; CHECK-LABEL: short9( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b16 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.b16 [func_retval0+16], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0+16], 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0+8], {0, 0}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <9 x i16> zeroinitializer } @@ -119,11 +110,10 @@ define <9 x i16> @short9() { define <8 x i16> @short8() { ; CHECK-LABEL: short8( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0}; ; CHECK-NEXT: ret; ret <8 x i16> zeroinitializer } @@ -131,13 +121,12 @@ define <8 x i16> @short8() { define <7 x i16> @short7() { ; CHECK-LABEL: short7( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b16 [func_retval0+8], {%rs1, %rs1}; -; CHECK-NEXT: st.param.b16 [func_retval0+12], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0+12], 0; +; CHECK-NEXT: st.param.b32 [func_retval0+8], 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <7 x i16> zeroinitializer } @@ -145,12 +134,11 @@ define <7 x i16> @short7() { define <5 x i16> @short5() { ; CHECK-LABEL: short5( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b16 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.b16 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0+8], 0; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <5 x i16> zeroinitializer } @@ -158,11 +146,10 @@ define <5 x i16> @short5() { define <4 x i16> @short4() { ; CHECK-LABEL: short4( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <4 x i16> zeroinitializer } @@ -170,12 +157,11 @@ define <4 x i16> @short4() { define <3 x i16> @short3() { ; CHECK-LABEL: short3( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs1, %rs1}; -; CHECK-NEXT: st.param.b16 [func_retval0+4], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0+4], 0; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <3 x i16> zeroinitializer } @@ -183,11 +169,10 @@ define <3 x i16> @short3() { define <2 x i16> @short2() { ; CHECK-LABEL: short2( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <2 x i16> zeroinitializer } @@ -195,11 +180,10 @@ define <2 x i16> @short2() { define <1 x i16> @short1() { ; CHECK-LABEL: short1( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; +; CHECK-NEXT: st.param.b16 [func_retval0], 0; ; CHECK-NEXT: ret; ret <1 x i16> zeroinitializer } @@ -207,15 +191,12 @@ define <1 x i16> @short1() { define <17 x i8> @byte17() { ; CHECK-LABEL: byte17( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+16], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+16], 0; +; CHECK-NEXT: st.param.b64 [func_retval0+8], 0; +; CHECK-NEXT: st.param.b64 [func_retval0], 0; ; CHECK-NEXT: ret; ret <17 x i8> zeroinitializer } @@ -223,11 +204,10 @@ define <17 x i8> @byte17() { define <16 x i8> @byte16() { ; CHECK-LABEL: byte16( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r1, %r1, %r1, %r1}; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {0, 0, 0, 0}; ; CHECK-NEXT: ret; ret <16 x i8> zeroinitializer } @@ -235,15 +215,13 @@ define <16 x i8> @byte16() { define <15 x i8> @byte15() { ; CHECK-LABEL: byte15( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+14], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+14], 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {0, 0}; +; CHECK-NEXT: st.param.b32 [func_retval0+8], 0; +; CHECK-NEXT: st.param.b64 [func_retval0], 0; ; CHECK-NEXT: ret; ret <15 x i8> zeroinitializer } @@ -251,13 +229,11 @@ define <15 x i8> @byte15() { define <9 x i8> @byte9() { ; CHECK-LABEL: byte9( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+8], 0; +; CHECK-NEXT: st.param.b64 [func_retval0], 0; ; CHECK-NEXT: ret; ret <9 x i8> zeroinitializer } @@ -265,11 +241,10 @@ define <9 x i8> @byte9() { define <8 x i8> @byte8() { ; CHECK-LABEL: byte8( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r1, %r1}; +; CHECK-NEXT: st.param.v2.b32 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <8 x i8> zeroinitializer } @@ -277,13 +252,12 @@ define <8 x i8> @byte8() { define <7 x i8> @byte7() { ; CHECK-LABEL: byte7( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+6], 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0}; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <7 x i8> zeroinitializer } @@ -291,12 +265,11 @@ define <7 x i8> @byte7() { define <5 x i8> @byte5() { ; CHECK-LABEL: byte5( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+4], 0; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <5 x i8> zeroinitializer } @@ -304,11 +277,10 @@ define <5 x i8> @byte5() { define <4 x i8> @byte4() { ; CHECK-LABEL: byte4( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <4 x i8> zeroinitializer } @@ -316,11 +288,10 @@ define <4 x i8> @byte4() { define <3 x i8> @byte3() { ; CHECK-LABEL: byte3( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <3 x i8> zeroinitializer } @@ -328,11 +299,10 @@ define <3 x i8> @byte3() { define <2 x i8> @byte2() { ; CHECK-LABEL: byte2( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b32 %r1, 0; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <2 x i8> zeroinitializer } @@ -340,11 +310,10 @@ define <2 x i8> @byte2() { define <1 x i8> @byte1() { ; CHECK-LABEL: byte1( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <1 x i8> zeroinitializer } @@ -352,15 +321,14 @@ define <1 x i8> @byte1() { define <17 x i1> @bit17() { ; CHECK-LABEL: bit17( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v4.b8 [func_retval0], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+4], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+8], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.v4.b8 [func_retval0+12], {%rs1, %rs1, %rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+16], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+16], 0; +; CHECK-NEXT: st.param.b32 [func_retval0+12], 0; +; CHECK-NEXT: st.param.b32 [func_retval0+8], 0; +; CHECK-NEXT: st.param.b32 [func_retval0+4], 0; +; CHECK-NEXT: st.param.b32 [func_retval0], 0; ; CHECK-NEXT: ret; ret <17 x i1> zeroinitializer } @@ -368,18 +336,17 @@ define <17 x i1> @bit17() { define <16 x i1> @bit16() { ; CHECK-LABEL: bit16( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+14], {%rs1, %rs1}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+14], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <16 x i1> zeroinitializer } @@ -387,18 +354,17 @@ define <16 x i1> @bit16() { define <15 x i1> @bit15() { ; CHECK-LABEL: bit15( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {%rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+14], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+14], 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+12], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+10], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+8], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <15 x i1> zeroinitializer } @@ -406,15 +372,14 @@ define <15 x i1> @bit15() { define <9 x i1> @bit9() { ; CHECK-LABEL: bit9( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.v2.b8 [func_retval0], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {%rs1, %rs1}; -; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {%rs1, %rs1}; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+8], 0; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+6], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+4], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0+2], {0, 0}; +; CHECK-NEXT: st.param.v2.b8 [func_retval0], {0, 0}; ; CHECK-NEXT: ret; ret <9 x i1> zeroinitializer } @@ -422,18 +387,17 @@ define <9 x i1> @bit9() { define <8 x i1> @bit8() { ; CHECK-LABEL: bit8( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+7], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+6], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+5], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+4], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+3], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+2], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+1], 0; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <8 x i1> zeroinitializer } @@ -441,17 +405,16 @@ define <8 x i1> @bit8() { define <7 x i1> @bit7() { ; CHECK-LABEL: bit7( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+6], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+5], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+4], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+3], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+2], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+1], 0; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <7 x i1> zeroinitializer } @@ -459,15 +422,14 @@ define <7 x i1> @bit7() { define <5 x i1> @bit5() { ; CHECK-LABEL: bit5( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+4], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+3], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+2], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+1], 0; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <5 x i1> zeroinitializer } @@ -475,14 +437,13 @@ define <5 x i1> @bit5() { define <4 x i1> @bit4() { ; CHECK-LABEL: bit4( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+3], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+2], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+1], 0; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <4 x i1> zeroinitializer } @@ -490,13 +451,12 @@ define <4 x i1> @bit4() { define <3 x i1> @bit3() { ; CHECK-LABEL: bit3( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+2], 0; +; CHECK-NEXT: st.param.b8 [func_retval0+1], 0; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <3 x i1> zeroinitializer } @@ -504,12 +464,11 @@ define <3 x i1> @bit3() { define <2 x i1> @bit2() { ; CHECK-LABEL: bit2( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; -; CHECK-NEXT: st.param.b8 [func_retval0+1], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0+1], 0; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <2 x i1> zeroinitializer } @@ -517,11 +476,10 @@ define <2 x i1> @bit2() { define <1 x i1> @bit1() { ; CHECK-LABEL: bit1( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0; -; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; +; CHECK-NEXT: st.param.b8 [func_retval0], 0; ; CHECK-NEXT: ret; ret <1 x i1> zeroinitializer } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected index f466b1de9fb5a..51cafacd57ad5 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected @@ -54,8 +54,8 @@ define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly by ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [callee_St8x4_param_0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [callee_St8x4_param_0+16]; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: st.param.v2.b64 [func_retval0+16], {%rd3, %rd4}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2}; ; CHECK-NEXT: ret; %1 = load i64, ptr %in, align 8 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1