diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c2fca79979e1b..41fa6a6dad98e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -475,6 +475,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { EVT EltVT = VT.getVectorElementType(); SDLoc DL(N); SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); + unsigned NumRegs = EltVT.getSizeInBits() / 32; + bool IsGCN = TM.getTargetTriple().isAMDGCN(); if (NumVectorElts == 1) { CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), @@ -482,7 +484,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { return; } - bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN(); if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 && CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) { uint64_t C = 0; @@ -511,8 +512,10 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { } } - assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not " - "supported yet"); + assert(NumVectorElts <= 32 && + "Vectors with more than 32 elements are not supported yet"); + assert((IsGCN || (!IsGCN && NumRegs == 1)) && + "R600 does not support 64-bit reg_seq elements"); // 32 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class @@ -527,8 +530,9 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { IsRegSeq = false; break; } - unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) - : R600RegisterInfo::getSubRegFromChannel(i); + unsigned Sub = + IsGCN ? SIRegisterInfo::getSubRegFromChannel(i * NumRegs, NumRegs) + : R600RegisterInfo::getSubRegFromChannel(i); RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); } @@ -538,8 +542,9 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, EltVT); for (unsigned i = NOps; i < NumVectorElts; ++i) { - unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) - : R600RegisterInfo::getSubRegFromChannel(i); + unsigned Sub = + IsGCN ? SIRegisterInfo::getSubRegFromChannel(i * NumRegs, NumRegs) + : R600RegisterInfo::getSubRegFromChannel(i); RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); @@ -707,9 +712,13 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { break; } - assert(VT.getVectorElementType().bitsEq(MVT::i32)); + EVT VET = VT.getVectorElementType(); + assert((VET.bitsEq(MVT::i32) || VET.bitsEq(MVT::i64)) && + "Only 32-bit and 64-bit vector elements supported"); + unsigned EltSize = VET.getSizeInBits(); unsigned RegClassID = - SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID(); + SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * EltSize) + ->getID(); SelectBuildVector(N, RegClassID); return; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 015f8fe49ebcf..452bbd735e57a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -360,9 +360,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, // Most operations are naturally 32-bit vector operations. We only support // load and store of i64 vectors, so promote v2i64 vector operations to v4i32. for (MVT Vec64 : {MVT::v2i64, MVT::v2f64}) { - setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); - AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); - + if (!STI.hasMovB64()) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32); @@ -374,9 +375,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } for (MVT Vec64 : {MVT::v3i64, MVT::v3f64}) { - setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); - AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32); - + if (!STI.hasMovB64()) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32); @@ -388,9 +390,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } for (MVT Vec64 : {MVT::v4i64, MVT::v4f64}) { - setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); - AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32); - + if (!STI.hasMovB64()) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32); @@ -402,9 +405,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } for (MVT Vec64 : {MVT::v8i64, MVT::v8f64}) { - setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); - AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32); - + if (!STI.hasMovB64()) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32); @@ -416,9 +420,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } for (MVT Vec64 : {MVT::v16i64, MVT::v16f64}) { - setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); - AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); - + if (!STI.hasMovB64()) { + setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote); + AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote); AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32); @@ -977,6 +982,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::ADD, ISD::PTRADD, + ISD::BUILD_VECTOR, ISD::UADDO_CARRY, ISD::SUB, ISD::USUBO_CARRY, @@ -15103,6 +15109,18 @@ bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const { EltSize, NumElem, Idx->isDivergent(), getSubtarget()); } +static unsigned getMappedVectorIndex(unsigned Idx, EVT From, EVT To) { + assert(From.isVector() && To.isVector() && + "Expected From and To types to be vector types."); + assert(From.getSizeInBits() == To.getSizeInBits() && + "Expected From and To vector types require to have the same size."); + + unsigned FromNumElts = From.getVectorNumElements(); + unsigned ToNumElts = To.getVectorNumElements(); + + return (Idx * ToNumElts) / FromNumElts; +} + SDValue SITargetLowering::performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -15186,6 +15204,27 @@ SITargetLowering::performExtractVectorEltCombine(SDNode *N, } } + // if PeekThoughBitcast(Vec)[MapIdx(CIdx)] == undef && + // VecEltSize < PeekThroughEltSize, then + // EXTRACT_VECTOR_ELT(bitcast(build_vector(..., undef, ...)), CIdx) => undef + auto *IndexC = dyn_cast(N->getOperand(1)); + SDValue PeekThroughVec = peekThroughBitcasts(Vec); + EVT PeekThroughVecVT = PeekThroughVec.getValueType(); + if (IndexC && PeekThroughVec.getOpcode() == ISD::BUILD_VECTOR && + PeekThroughVecVT.isFixedLengthVector()) { + EVT PeekThroughVecEltVT = PeekThroughVecVT.getVectorElementType(); + // Small elt size vectors to big elt size vectors are the cases covered for + // now (e.g., v4i32 bitcast(v2i64)) which may be conservative. + if (VecEltSize < PeekThroughVecEltVT.getSizeInBits()) { + unsigned IndexVal = IndexC->getZExtValue(); + unsigned MappedIndexVal = + getMappedVectorIndex(IndexVal, VecVT, PeekThroughVecVT); + SDValue PeekThroughElt = PeekThroughVec.getOperand(MappedIndexVal); + if (PeekThroughElt.isUndef()) + return DAG.getNode(PeekThroughElt.getOpcode(), SDLoc(), VecEltVT); + } + } + // EXTRACT_VECTOR_ELT (, var-idx) => n x select (e, const-idx) if (shouldExpandVectorDynExt(N)) { SDLoc SL(N); @@ -16792,6 +16831,95 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N, SelectLHS, SelectRHS); } +SDValue +SITargetLowering::performBuildVectorCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + // TODO: legalize for all targets instead of just v_mov_b64 enabled ones, + // legalizing could still enable s_mov_b64 which is supported on all targets. + const GCNSubtarget *ST = getSubtarget(); + if (DCI.Level < AfterLegalizeDAG || !ST->hasMovB64()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + unsigned SizeBits = VT.getSizeInBits(); + unsigned EltSize = EltVT.getSizeInBits(); + + // Skip if: + // - Value type isn't multiple of 64 bit (e.g., v3i32), or + // - Element type has already been combined into 64b elements + if ((SizeBits % 64) != 0 || EltVT == MVT::i64 || EltVT == MVT::f64) + return SDValue(); + + // Construct the 64b values. + SmallVector ImmVals; + uint64_t ImmVal = 0; + uint64_t ImmSize = 0; + for (SDValue Opand : N->ops()) { + // Build_vector with constants only. + ConstantSDNode *C = dyn_cast(Opand); + ConstantFPSDNode *FPC = dyn_cast(Opand); + BuildVectorSDNode *BV = + dyn_cast(peekThroughBitcasts(Opand)); + + if (!C && !FPC && !BV) + return SDValue(); + + uint64_t Val = 0; + if (BV) { + if (!BV->isConstant()) + return SDValue(); + bool IsLE = DAG.getDataLayout().isLittleEndian(); + BitVector UndefElements; + SmallVector RawBits; + if (!BV->getConstantRawBits(IsLE, EltSize, RawBits, UndefElements)) + return SDValue(); + + assert(RawBits.size() == 1 && + "BuildVector constant value retrieval expected 1 element"); + + if (UndefElements.any()) + return SDValue(); + + Val = RawBits[0].getZExtValue(); + } else { + Val = C ? C->getZExtValue() + : FPC->getValueAPF().bitcastToAPInt().getZExtValue(); + } + ImmVal |= Val << ImmSize; + ImmSize += EltSize; + if (ImmSize == 64) { + if (!isUInt<32>(ImmVal)) + return SDValue(); + ImmVals.push_back(ImmVal); + ImmVal = 0; + ImmSize = 0; + } + } + + // Avoid emitting build_vector with 1 element and directly emit value. + if (ImmVals.size() == 1) { + SDValue Val = DAG.getConstant(ImmVals[0], SL, MVT::i64); + return DAG.getBitcast(VT, Val); + } + + // Construct and return build_vector with 64b elements. + if (!ImmVals.empty()) { + SmallVector VectorConsts(ImmVals.size()); + for (unsigned i = 0; i < ImmVals.size(); ++i) + VectorConsts[i] = DAG.getConstant(ImmVals[i], SL, MVT::i64); + unsigned NewNumElts = SizeBits / 64; + LLVMContext &Ctx = *DAG.getContext(); + EVT NewVT = EVT::getVectorVT(Ctx, MVT::i64, NewNumElts); + SDValue BV = DAG.getBuildVector(NewVT, SL, VectorConsts); + return DAG.getBitcast(VT, BV); + } + return SDValue(); +} + SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -16885,6 +17013,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performFCanonicalizeCombine(N, DCI); case AMDGPUISD::RCP: return performRcpCombine(N, DCI); + case ISD::BUILD_VECTOR: + return performBuildVectorCombine(N, DCI); case ISD::FLDEXP: case AMDGPUISD::FRACT: case AMDGPUISD::RSQ: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index ba408a8f64540..326c27cbc6e9f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -245,6 +245,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const; bool isLegalMUBUFAddressingMode(const AddrMode &AM) const; diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll index d053425afbb6d..51731b1d4dcdf 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll @@ -10257,48 +10257,48 @@ define void @flat_atomic_fsub_f64_ret_av_av(ptr %ptr) #0 { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[2:3], 0x50 ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[2:3] -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 +; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[2:3] +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[2:3] +; GFX950-NEXT: ; def v[4:5] ; GFX950-NEXT: ;;#ASMEND -; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB130_4 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global -; GFX950-NEXT: flat_load_dwordx2 v[4:5], v[0:1] +; GFX950-NEXT: flat_load_dwordx2 v[0:1], v[2:3] ; GFX950-NEXT: s_mov_b64 s[2:3], 0 ; GFX950-NEXT: .LBB130_2: ; %atomicrmw.start ; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX950-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX950-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] sc0 +; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[0:1] +; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[4:5] +; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX950-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX950-NEXT: s_cbranch_execnz .LBB130_2 ; GFX950-NEXT: ; %bb.3: ; %Flow ; GFX950-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX950-NEXT: .LBB130_4: ; %Flow3 ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB130_6 ; GFX950-NEXT: ; %bb.5: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc -; GFX950-NEXT: scratch_load_dwordx2 v[4:5], v6, off +; GFX950-NEXT: v_cndmask_b32_e32 v6, -1, v2, vcc +; GFX950-NEXT: scratch_load_dwordx2 v[0:1], v6, off ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3] -; GFX950-NEXT: scratch_store_dwordx2 v6, v[0:1], off +; GFX950-NEXT: v_add_f64 v[2:3], v[0:1], -v[4:5] +; GFX950-NEXT: scratch_store_dwordx2 v6, v[2:3], off ; GFX950-NEXT: .LBB130_6: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[4:5] +; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll index c98fff96d7b8a..da976d72b837e 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-global-atomicrmw.ll @@ -6823,26 +6823,26 @@ define void @global_atomic_fsub_f64_ret_av_av(ptr addrspace(1) %ptr) #0 { ; GFX950-LABEL: global_atomic_fsub_f64_ret_av_av: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: global_load_dwordx2 v[4:5], v[0:1], off offset:80 +; GFX950-NEXT: global_load_dwordx2 v[2:3], v[0:1], off offset:80 ; GFX950-NEXT: s_mov_b64 s[0:1], 0 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[2:3] +; GFX950-NEXT: ; def v[4:5] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: .LBB130_1: ; %atomicrmw.start ; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX950-NEXT: v_add_f64 v[4:5], v[6:7], -v[2:3] -; GFX950-NEXT: global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off offset:80 sc0 +; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[2:3] +; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[4:5] +; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9], off offset:80 sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7] +; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] ; GFX950-NEXT: s_or_b64 s[0:1], vcc, s[0:1] ; GFX950-NEXT: s_andn2_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_cbranch_execnz .LBB130_1 ; GFX950-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[4:5] +; GFX950-NEXT: ; use v[2:3] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_setpc_b64 s[30:31] %gep.0 = getelementptr inbounds [512 x double], ptr addrspace(1) %ptr, i64 0, i64 10 @@ -12626,26 +12626,26 @@ define void @global_atomic_fsub_f64_saddr_ret_av_av(ptr addrspace(1) inreg %ptr) ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: v_mov_b32_e32 v4, 0 -; GFX950-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1] offset:80 +; GFX950-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1] offset:80 ; GFX950-NEXT: s_mov_b64 s[2:3], 0 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[2:3] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: .LBB238_1: ; %atomicrmw.start ; GFX950-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[2:3] -; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[0:1] -; GFX950-NEXT: global_atomic_cmpswap_x2 v[2:3], v4, v[6:9], s[0:1] offset:80 sc0 +; GFX950-NEXT: v_mov_b64_e32 v[8:9], v[0:1] +; GFX950-NEXT: v_add_f64 v[6:7], v[8:9], -v[2:3] +; GFX950-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[6:9], s[0:1] offset:80 sc0 ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: s_or_b64 s[2:3], vcc, s[2:3] ; GFX950-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GFX950-NEXT: s_cbranch_execnz .LBB238_1 ; GFX950-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX950-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[2:3] +; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_setpc_b64 s[30:31] %gep.0 = getelementptr inbounds [512 x double], ptr addrspace(1) %ptr, i64 0, i64 10 diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 44c719f3635c8..8357ffab5e5da 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -6952,50 +6952,95 @@ define <32 x float> @global_extload_v32bf16_to_v32f32(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_and_b32_e32 v31, 0xffff0000, v31 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: global_extload_v32bf16_to_v32f32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off -; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 -; GFX9-NEXT: s_waitcnt vmcnt(3) -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v4 -; GFX9-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v5 -; GFX9-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v6 -; GFX9-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 -; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v12 -; GFX9-NEXT: v_and_b32_e32 v9, 0xffff0000, v12 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 16, v13 -; GFX9-NEXT: v_and_b32_e32 v11, 0xffff0000, v13 -; GFX9-NEXT: v_lshlrev_b32_e32 v12, 16, v14 -; GFX9-NEXT: v_and_b32_e32 v13, 0xffff0000, v14 -; GFX9-NEXT: v_lshlrev_b32_e32 v14, 16, v15 -; GFX9-NEXT: v_and_b32_e32 v15, 0xffff0000, v15 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b32_e32 v16, 16, v20 -; GFX9-NEXT: v_and_b32_e32 v17, 0xffff0000, v20 -; GFX9-NEXT: v_lshlrev_b32_e32 v18, 16, v21 -; GFX9-NEXT: v_and_b32_e32 v19, 0xffff0000, v21 -; GFX9-NEXT: v_lshlrev_b32_e32 v20, 16, v22 -; GFX9-NEXT: v_and_b32_e32 v21, 0xffff0000, v22 -; GFX9-NEXT: v_lshlrev_b32_e32 v22, 16, v23 -; GFX9-NEXT: v_and_b32_e32 v23, 0xffff0000, v23 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v24, 16, v28 -; GFX9-NEXT: v_and_b32_e32 v25, 0xffff0000, v28 -; GFX9-NEXT: v_lshlrev_b32_e32 v26, 16, v29 -; GFX9-NEXT: v_and_b32_e32 v27, 0xffff0000, v29 -; GFX9-NEXT: v_lshlrev_b32_e32 v28, 16, v30 -; GFX9-NEXT: v_and_b32_e32 v29, 0xffff0000, v30 -; GFX9-NEXT: v_lshlrev_b32_e32 v30, 16, v31 -; GFX9-NEXT: v_and_b32_e32 v31, 0xffff0000, v31 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: global_extload_v32bf16_to_v32f32: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: global_load_dwordx4 v[4:7], v[0:1], off +; GFX900-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:16 +; GFX900-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 +; GFX900-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 +; GFX900-NEXT: s_waitcnt vmcnt(3) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v4 +; GFX900-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v5 +; GFX900-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 +; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v6 +; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 +; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v7 +; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 +; GFX900-NEXT: s_waitcnt vmcnt(2) +; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v12 +; GFX900-NEXT: v_and_b32_e32 v9, 0xffff0000, v12 +; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v13 +; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v13 +; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v14 +; GFX900-NEXT: v_and_b32_e32 v13, 0xffff0000, v14 +; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v15 +; GFX900-NEXT: v_and_b32_e32 v15, 0xffff0000, v15 +; GFX900-NEXT: s_waitcnt vmcnt(1) +; GFX900-NEXT: v_lshlrev_b32_e32 v16, 16, v20 +; GFX900-NEXT: v_and_b32_e32 v17, 0xffff0000, v20 +; GFX900-NEXT: v_lshlrev_b32_e32 v18, 16, v21 +; GFX900-NEXT: v_and_b32_e32 v19, 0xffff0000, v21 +; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v22 +; GFX900-NEXT: v_and_b32_e32 v21, 0xffff0000, v22 +; GFX900-NEXT: v_lshlrev_b32_e32 v22, 16, v23 +; GFX900-NEXT: v_and_b32_e32 v23, 0xffff0000, v23 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v28 +; GFX900-NEXT: v_and_b32_e32 v25, 0xffff0000, v28 +; GFX900-NEXT: v_lshlrev_b32_e32 v26, 16, v29 +; GFX900-NEXT: v_and_b32_e32 v27, 0xffff0000, v29 +; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v30 +; GFX900-NEXT: v_and_b32_e32 v29, 0xffff0000, v30 +; GFX900-NEXT: v_lshlrev_b32_e32 v30, 16, v31 +; GFX900-NEXT: v_and_b32_e32 v31, 0xffff0000, v31 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: global_extload_v32bf16_to_v32f32: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 +; GFX950-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32 +; GFX950-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 +; GFX950-NEXT: global_load_dwordx4 v[32:35], v[0:1], off +; GFX950-NEXT: s_waitcnt vmcnt(3) +; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v2 +; GFX950-NEXT: v_and_b32_e32 v9, 0xffff0000, v2 +; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v3 +; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v3 +; GFX950-NEXT: v_lshlrev_b32_e32 v12, 16, v4 +; GFX950-NEXT: v_and_b32_e32 v13, 0xffff0000, v4 +; GFX950-NEXT: v_lshlrev_b32_e32 v14, 16, v5 +; GFX950-NEXT: v_and_b32_e32 v15, 0xffff0000, v5 +; GFX950-NEXT: s_waitcnt vmcnt(2) +; GFX950-NEXT: v_lshlrev_b32_e32 v16, 16, v20 +; GFX950-NEXT: v_and_b32_e32 v17, 0xffff0000, v20 +; GFX950-NEXT: v_lshlrev_b32_e32 v18, 16, v21 +; GFX950-NEXT: v_and_b32_e32 v19, 0xffff0000, v21 +; GFX950-NEXT: v_lshlrev_b32_e32 v20, 16, v22 +; GFX950-NEXT: v_and_b32_e32 v21, 0xffff0000, v22 +; GFX950-NEXT: v_lshlrev_b32_e32 v22, 16, v23 +; GFX950-NEXT: v_and_b32_e32 v23, 0xffff0000, v23 +; GFX950-NEXT: s_waitcnt vmcnt(1) +; GFX950-NEXT: v_lshlrev_b32_e32 v24, 16, v28 +; GFX950-NEXT: v_and_b32_e32 v25, 0xffff0000, v28 +; GFX950-NEXT: v_lshlrev_b32_e32 v26, 16, v29 +; GFX950-NEXT: v_and_b32_e32 v27, 0xffff0000, v29 +; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v30 +; GFX950-NEXT: v_and_b32_e32 v29, 0xffff0000, v30 +; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v31 +; GFX950-NEXT: v_and_b32_e32 v31, 0xffff0000, v31 +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v32 +; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v32 +; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v33 +; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v33 +; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v34 +; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v34 +; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v35 +; GFX950-NEXT: v_and_b32_e32 v7, 0xffff0000, v35 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v32bf16_to_v32f32: ; GFX10: ; %bb.0: @@ -7151,10 +7196,10 @@ define <2 x double> @global_extload_v2bf16_to_v2f64(ptr addrspace(1) %ptr) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: global_load_dword v0, v[0:1], off ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v0 -; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v0 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 +; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v2bf16_to_v2f64: @@ -7250,12 +7295,12 @@ define <3 x double> @global_extload_v3bf16_to_v3f64(ptr addrspace(1) %ptr) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v1 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v0 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v3bf16_to_v3f64: @@ -7344,20 +7389,35 @@ define <4 x double> @global_extload_v4bf16_to_v4f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: global_extload_v4bf16_to_v4f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX9-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v1 -; GFX9-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: global_extload_v4bf16_to_v4f64: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX900-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v1 +; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: global_extload_v4bf16_to_v4f64: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: global_load_dwordx2 v[0:1], v[0:1], off +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v1 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v4bf16_to_v4f64: ; GFX10: ; %bb.0: @@ -7459,22 +7519,39 @@ define <5 x double> @global_extload_v5bf16_to_v5f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: global_extload_v5bf16_to_v5f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX9-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 16, v1 -; GFX9-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v2 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v3 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v4 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v5 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: global_extload_v5bf16_to_v5f64: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 +; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1 +; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 +; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v2 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v3 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v4 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v5 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: global_extload_v5bf16_to_v5f64: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v1 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX950-NEXT: v_and_b32_e32 v10, 0xffff0000, v0 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v3 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v10 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v5bf16_to_v5f64: ; GFX10: ; %bb.0: @@ -7580,24 +7657,43 @@ define <6 x double> @global_extload_v6bf16_to_v6f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_cvt_f64_f32_e32 v[10:11], v10 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: global_extload_v6bf16_to_v6f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX9-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 16, v1 -; GFX9-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v2 -; GFX9-NEXT: v_and_b32_e32 v10, 0xffff0000, v2 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v3 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v4 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v5 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[10:11], v10 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: global_extload_v6bf16_to_v6f64: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: global_load_dwordx3 v[0:2], v[0:1], off +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v3, 16, v0 +; GFX900-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 +; GFX900-NEXT: v_lshlrev_b32_e32 v5, 16, v1 +; GFX900-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 +; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v2 +; GFX900-NEXT: v_and_b32_e32 v10, 0xffff0000, v2 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v3 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v4 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v5 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[10:11], v10 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: global_extload_v6bf16_to_v6f64: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: global_load_dwordx3 v[0:2], v[0:1], off +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_and_b32_e32 v3, 0xffff0000, v2 +; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v1 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX950-NEXT: v_and_b32_e32 v12, 0xffff0000, v0 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v3 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v4 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v12 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v6bf16_to_v6f64: ; GFX10: ; %bb.0: @@ -7719,28 +7815,51 @@ define <8 x double> @global_extload_v8bf16_to_v8f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_cvt_f64_f32_e32 v[14:15], v14 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: global_extload_v8bf16_to_v8f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v0 -; GFX9-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v1 -; GFX9-NEXT: v_and_b32_e32 v7, 0xffff0000, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v8, 16, v2 -; GFX9-NEXT: v_and_b32_e32 v10, 0xffff0000, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v12, 16, v3 -; GFX9-NEXT: v_and_b32_e32 v14, 0xffff0000, v3 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v4 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v5 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v7 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[10:11], v10 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[12:13], v12 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[14:15], v14 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: global_extload_v8bf16_to_v8f64: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v4, 16, v0 +; GFX900-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 +; GFX900-NEXT: v_lshlrev_b32_e32 v6, 16, v1 +; GFX900-NEXT: v_and_b32_e32 v7, 0xffff0000, v1 +; GFX900-NEXT: v_lshlrev_b32_e32 v8, 16, v2 +; GFX900-NEXT: v_and_b32_e32 v10, 0xffff0000, v2 +; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v3 +; GFX900-NEXT: v_and_b32_e32 v14, 0xffff0000, v3 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v4 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v5 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v6 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v7 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[10:11], v10 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[12:13], v12 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[14:15], v14 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: global_extload_v8bf16_to_v8f64: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v3 +; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v2 +; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX950-NEXT: v_and_b32_e32 v6, 0xffff0000, v1 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX950-NEXT: v_and_b32_e32 v16, 0xffff0000, v0 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v4 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v3 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v5 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v16 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v8bf16_to_v8f64: ; GFX10: ; %bb.0: @@ -7926,46 +8045,87 @@ define <16 x double> @global_extload_v16bf16_to_v16f64(ptr addrspace(1) %ptr) { ; GFX8-NEXT: v_cvt_f64_f32_e32 v[30:31], v30 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: global_extload_v16bf16_to_v16f64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off -; GFX9-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:16 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v2 -; GFX9-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v10, 16, v3 -; GFX9-NEXT: v_and_b32_e32 v11, 0xffff0000, v3 -; GFX9-NEXT: v_lshlrev_b32_e32 v12, 16, v4 -; GFX9-NEXT: v_and_b32_e32 v13, 0xffff0000, v4 -; GFX9-NEXT: v_lshlrev_b32_e32 v14, 16, v5 -; GFX9-NEXT: v_and_b32_e32 v15, 0xffff0000, v5 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshlrev_b32_e32 v16, 16, v6 -; GFX9-NEXT: v_and_b32_e32 v18, 0xffff0000, v6 -; GFX9-NEXT: v_lshlrev_b32_e32 v20, 16, v7 -; GFX9-NEXT: v_and_b32_e32 v22, 0xffff0000, v7 -; GFX9-NEXT: v_lshlrev_b32_e32 v24, 16, v8 -; GFX9-NEXT: v_and_b32_e32 v26, 0xffff0000, v8 -; GFX9-NEXT: v_lshlrev_b32_e32 v28, 16, v9 -; GFX9-NEXT: v_and_b32_e32 v30, 0xffff0000, v9 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[4:5], v10 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[6:7], v11 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[8:9], v12 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[10:11], v13 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[12:13], v14 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[14:15], v15 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[16:17], v16 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[18:19], v18 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[20:21], v20 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[22:23], v22 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[24:25], v24 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[26:27], v26 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[28:29], v28 -; GFX9-NEXT: v_cvt_f64_f32_e32 v[30:31], v30 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: global_extload_v16bf16_to_v16f64: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: global_load_dwordx4 v[2:5], v[0:1], off +; GFX900-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:16 +; GFX900-NEXT: s_waitcnt vmcnt(1) +; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v2 +; GFX900-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 +; GFX900-NEXT: v_lshlrev_b32_e32 v10, 16, v3 +; GFX900-NEXT: v_and_b32_e32 v11, 0xffff0000, v3 +; GFX900-NEXT: v_lshlrev_b32_e32 v12, 16, v4 +; GFX900-NEXT: v_and_b32_e32 v13, 0xffff0000, v4 +; GFX900-NEXT: v_lshlrev_b32_e32 v14, 16, v5 +; GFX900-NEXT: v_and_b32_e32 v15, 0xffff0000, v5 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_lshlrev_b32_e32 v16, 16, v6 +; GFX900-NEXT: v_and_b32_e32 v18, 0xffff0000, v6 +; GFX900-NEXT: v_lshlrev_b32_e32 v20, 16, v7 +; GFX900-NEXT: v_and_b32_e32 v22, 0xffff0000, v7 +; GFX900-NEXT: v_lshlrev_b32_e32 v24, 16, v8 +; GFX900-NEXT: v_and_b32_e32 v26, 0xffff0000, v8 +; GFX900-NEXT: v_lshlrev_b32_e32 v28, 16, v9 +; GFX900-NEXT: v_and_b32_e32 v30, 0xffff0000, v9 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[4:5], v10 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[6:7], v11 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[8:9], v12 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[10:11], v13 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[12:13], v14 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[14:15], v15 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[16:17], v16 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[18:19], v18 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[20:21], v20 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[22:23], v22 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[24:25], v24 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[26:27], v26 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[28:29], v28 +; GFX900-NEXT: v_cvt_f64_f32_e32 v[30:31], v30 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: global_extload_v16bf16_to_v16f64: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:16 +; GFX950-NEXT: global_load_dwordx4 v[6:9], v[0:1], off +; GFX950-NEXT: s_waitcnt vmcnt(1) +; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v5 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v5 +; GFX950-NEXT: v_and_b32_e32 v5, 0xffff0000, v4 +; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX950-NEXT: v_and_b32_e32 v10, 0xffff0000, v3 +; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX950-NEXT: v_and_b32_e32 v11, 0xffff0000, v2 +; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_and_b32_e32 v12, 0xffff0000, v9 +; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v9 +; GFX950-NEXT: v_and_b32_e32 v32, 0xffff0000, v8 +; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v8 +; GFX950-NEXT: v_and_b32_e32 v33, 0xffff0000, v7 +; GFX950-NEXT: v_lshlrev_b32_e32 v34, 16, v7 +; GFX950-NEXT: v_and_b32_e32 v35, 0xffff0000, v6 +; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v6 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[30:31], v0 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[28:29], v1 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[26:27], v5 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[24:25], v4 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[22:23], v10 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[20:21], v3 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[18:19], v11 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[16:17], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v12 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v9 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v32 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v8 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v33 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v34 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v35 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[0:1], v36 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: global_extload_v16bf16_to_v16f64: ; GFX10: ; %bb.0: @@ -9068,141 +9228,150 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: global_load_ushort v1, v[2:3], off offset:2 -; GFX950-NEXT: global_load_ushort v4, v[2:3], off offset:12 -; GFX950-NEXT: global_load_ushort v5, v[2:3], off offset:8 -; GFX950-NEXT: global_load_ushort v6, v[2:3], off offset:4 -; GFX950-NEXT: global_load_ushort v7, v[2:3], off -; GFX950-NEXT: global_load_ushort v8, v[2:3], off offset:6 -; GFX950-NEXT: global_load_ushort v9, v[2:3], off offset:10 -; GFX950-NEXT: global_load_ushort v10, v[2:3], off offset:14 -; GFX950-NEXT: global_load_ushort v11, v[2:3], off offset:18 -; GFX950-NEXT: global_load_ushort v12, v[2:3], off offset:28 -; GFX950-NEXT: global_load_ushort v13, v[2:3], off offset:24 -; GFX950-NEXT: global_load_ushort v14, v[2:3], off offset:20 -; GFX950-NEXT: global_load_ushort v15, v[2:3], off offset:16 -; GFX950-NEXT: global_load_ushort v16, v[2:3], off offset:22 -; GFX950-NEXT: global_load_ushort v17, v[2:3], off offset:26 -; GFX950-NEXT: global_load_ushort v18, v[2:3], off offset:30 -; GFX950-NEXT: global_load_ushort v19, v[2:3], off offset:34 -; GFX950-NEXT: global_load_ushort v20, v[2:3], off offset:44 -; GFX950-NEXT: global_load_ushort v21, v[2:3], off offset:40 -; GFX950-NEXT: global_load_ushort v22, v[2:3], off offset:36 -; GFX950-NEXT: global_load_ushort v23, v[2:3], off offset:32 -; GFX950-NEXT: global_load_ushort v24, v[2:3], off offset:38 -; GFX950-NEXT: global_load_ushort v25, v[2:3], off offset:42 -; GFX950-NEXT: global_load_ushort v26, v[2:3], off offset:46 -; GFX950-NEXT: global_load_ushort v42, v[2:3], off offset:50 -; GFX950-NEXT: global_load_ushort v43, v[2:3], off offset:62 -; GFX950-NEXT: global_load_ushort v46, v[2:3], off offset:60 -; GFX950-NEXT: global_load_ushort v47, v[2:3], off offset:56 -; GFX950-NEXT: global_load_ushort v60, v[2:3], off offset:52 -; GFX950-NEXT: global_load_ushort v56, v[2:3], off offset:48 -; GFX950-NEXT: global_load_ushort v57, v[2:3], off offset:54 -; GFX950-NEXT: global_load_ushort v58, v[2:3], off offset:58 +; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX950-NEXT: global_load_ushort v1, v[2:3], off offset:30 +; GFX950-NEXT: global_load_ushort v4, v[2:3], off offset:28 +; GFX950-NEXT: global_load_ushort v5, v[2:3], off offset:26 +; GFX950-NEXT: global_load_ushort v6, v[2:3], off offset:24 +; GFX950-NEXT: global_load_ushort v7, v[2:3], off offset:22 +; GFX950-NEXT: global_load_ushort v8, v[2:3], off offset:20 +; GFX950-NEXT: global_load_ushort v9, v[2:3], off offset:18 +; GFX950-NEXT: global_load_ushort v10, v[2:3], off offset:16 +; GFX950-NEXT: global_load_ushort v11, v[2:3], off offset:14 +; GFX950-NEXT: global_load_ushort v12, v[2:3], off offset:12 +; GFX950-NEXT: global_load_ushort v13, v[2:3], off offset:10 +; GFX950-NEXT: global_load_ushort v14, v[2:3], off offset:8 +; GFX950-NEXT: global_load_ushort v15, v[2:3], off offset:6 +; GFX950-NEXT: global_load_ushort v16, v[2:3], off offset:4 +; GFX950-NEXT: global_load_ushort v17, v[2:3], off offset:2 +; GFX950-NEXT: global_load_ushort v18, v[2:3], off +; GFX950-NEXT: global_load_ushort v19, v[2:3], off offset:62 +; GFX950-NEXT: global_load_ushort v20, v[2:3], off offset:60 +; GFX950-NEXT: global_load_ushort v21, v[2:3], off offset:58 +; GFX950-NEXT: global_load_ushort v22, v[2:3], off offset:56 +; GFX950-NEXT: global_load_ushort v23, v[2:3], off offset:54 +; GFX950-NEXT: global_load_ushort v24, v[2:3], off offset:52 +; GFX950-NEXT: global_load_ushort v25, v[2:3], off offset:50 +; GFX950-NEXT: global_load_ushort v26, v[2:3], off offset:48 +; GFX950-NEXT: global_load_ushort v42, v[2:3], off offset:46 +; GFX950-NEXT: global_load_ushort v43, v[2:3], off offset:44 +; GFX950-NEXT: global_load_ushort v46, v[2:3], off offset:42 +; GFX950-NEXT: global_load_ushort v47, v[2:3], off offset:40 +; GFX950-NEXT: global_load_ushort v56, v[2:3], off offset:38 +; GFX950-NEXT: global_load_ushort v57, v[2:3], off offset:36 +; GFX950-NEXT: global_load_ushort v60, v[2:3], off offset:34 +; GFX950-NEXT: global_load_ushort v61, v[2:3], off offset:32 ; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: s_waitcnt vmcnt(31) ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX950-NEXT: s_waitcnt vmcnt(30) -; GFX950-NEXT: v_lshlrev_b32_e32 v30, 16, v4 +; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v4 ; GFX950-NEXT: s_waitcnt vmcnt(29) -; GFX950-NEXT: v_lshlrev_b32_e32 v28, 16, v5 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v1 +; GFX950-NEXT: v_lshlrev_b32_e32 v3, 16, v5 +; GFX950-NEXT: s_waitcnt vmcnt(28) +; GFX950-NEXT: v_lshlrev_b32_e32 v4, 16, v6 ; GFX950-NEXT: s_waitcnt vmcnt(27) -; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v7 +; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; GFX950-NEXT: s_waitcnt vmcnt(26) -; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v8 +; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v8 ; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_lshlrev_b32_e32 v27, 16, v9 +; GFX950-NEXT: v_lshlrev_b32_e32 v7, 16, v9 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[32:33], v1 ; GFX950-NEXT: s_waitcnt vmcnt(24) -; GFX950-NEXT: v_lshlrev_b32_e32 v29, 16, v10 +; GFX950-NEXT: v_lshlrev_b32_e32 v8, 16, v10 ; GFX950-NEXT: s_waitcnt vmcnt(23) -; GFX950-NEXT: v_lshlrev_b32_e32 v31, 16, v11 +; GFX950-NEXT: v_lshlrev_b32_e32 v9, 16, v11 ; GFX950-NEXT: s_waitcnt vmcnt(22) -; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v12 +; GFX950-NEXT: v_lshlrev_b32_e32 v10, 16, v12 ; GFX950-NEXT: s_waitcnt vmcnt(21) -; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v13 +; GFX950-NEXT: v_lshlrev_b32_e32 v11, 16, v13 ; GFX950-NEXT: s_waitcnt vmcnt(20) ; GFX950-NEXT: v_lshlrev_b32_e32 v34, 16, v14 -; GFX950-NEXT: s_waitcnt vmcnt(19) -; GFX950-NEXT: v_lshlrev_b32_e32 v32, 16, v15 ; GFX950-NEXT: s_waitcnt vmcnt(18) -; GFX950-NEXT: v_lshlrev_b32_e32 v33, 16, v16 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v27 +; GFX950-NEXT: v_lshlrev_b32_e32 v36, 16, v16 +; GFX950-NEXT: s_waitcnt vmcnt(17) +; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v17 ; GFX950-NEXT: s_waitcnt vmcnt(16) -; GFX950-NEXT: v_lshlrev_b32_e32 v37, 16, v18 +; GFX950-NEXT: v_lshlrev_b32_e32 v38, 16, v18 ; GFX950-NEXT: s_waitcnt vmcnt(15) ; GFX950-NEXT: v_lshlrev_b32_e32 v39, 16, v19 +; GFX950-NEXT: v_lshlrev_b32_e32 v35, 16, v15 ; GFX950-NEXT: s_waitcnt vmcnt(14) -; GFX950-NEXT: v_lshlrev_b32_e32 v44, 16, v20 +; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v20 ; GFX950-NEXT: s_waitcnt vmcnt(13) -; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v21 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v30 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[20:21], v31 +; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v21 +; GFX950-NEXT: s_waitcnt vmcnt(12) +; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v22 +; GFX950-NEXT: s_waitcnt vmcnt(11) +; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v23 ; GFX950-NEXT: s_waitcnt vmcnt(10) -; GFX950-NEXT: v_lshlrev_b32_e32 v49, 16, v24 -; GFX950-NEXT: s_waitcnt vmcnt(9) -; GFX950-NEXT: v_lshlrev_b32_e32 v53, 16, v25 +; GFX950-NEXT: v_lshlrev_b32_e32 v40, 16, v24 ; GFX950-NEXT: s_waitcnt vmcnt(8) -; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v26 +; GFX950-NEXT: v_lshlrev_b32_e32 v44, 16, v26 ; GFX950-NEXT: s_waitcnt vmcnt(7) ; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v42 -; GFX950-NEXT: s_waitcnt vmcnt(6) -; GFX950-NEXT: v_lshlrev_b32_e32 v42, 16, v43 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[18:19], v32 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[24:25], v33 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[26:27], v36 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[32:33], v37 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[30:31], v38 +; GFX950-NEXT: v_lshlrev_b32_e32 v41, 16, v25 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[30:31], v2 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[28:29], v3 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[26:27], v4 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[24:25], v5 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[22:23], v6 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[20:21], v7 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v36 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[4:5], v37 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v38 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[36:37], v39 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[38:39], v44 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[44:45], v42 -; GFX950-NEXT: s_waitcnt vmcnt(5) -; GFX950-NEXT: v_lshlrev_b32_e32 v42, 16, v46 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[42:43], v42 -; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_lshlrev_b32_e32 v46, 16, v58 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[42:45], off offset:240 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[58:59], v46 -; GFX950-NEXT: v_lshlrev_b32_e32 v46, 16, v47 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[44:45], v1 -; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v56 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[42:43], v1 -; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v57 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[56:57], v46 -; GFX950-NEXT: v_lshlrev_b32_e32 v35, 16, v17 -; GFX950-NEXT: v_lshlrev_b32_e32 v48, 16, v23 -; GFX950-NEXT: v_lshlrev_b32_e32 v52, 16, v22 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[56:59], off offset:224 -; GFX950-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v28 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[58:59], v1 -; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v60 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[16:17], v29 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[22:23], v34 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[28:29], v35 +; GFX950-NEXT: s_waitcnt vmcnt(6) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v43 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[18:19], v8 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[16:17], v9 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[14:15], v10 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[12:13], v11 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[10:11], v34 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v35 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[34:35], v48 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[42:43], v1 +; GFX950-NEXT: s_waitcnt vmcnt(5) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v46 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[34:37], off offset:240 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[50:51], v49 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[48:49], v52 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[36:37], v1 +; GFX950-NEXT: s_waitcnt vmcnt(5) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v47 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[34:35], v1 +; GFX950-NEXT: s_waitcnt vmcnt(4) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v56 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[58:59], v1 +; GFX950-NEXT: s_waitcnt vmcnt(3) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v57 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[56:57], v1 +; GFX950-NEXT: s_waitcnt vmcnt(2) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v60 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[54:55], v53 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[52:53], v40 ; GFX950-NEXT: v_cvt_f64_f32_e32 v[40:41], v41 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[56:57], v1 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[8:9], v7 -; GFX950-NEXT: v_cvt_f64_f32_e32 v[6:7], v6 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[56:59], off offset:208 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[42:45], off offset:192 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[38:41], off offset:176 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[52:55], off offset:160 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[48:51], off offset:144 -; GFX950-NEXT: scratch_store_dwordx4 v0, v[34:37], off offset:128 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[48:51], off offset:224 +; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse +; GFX950-NEXT: v_cvt_f64_f32_e32 v[50:51], v1 +; GFX950-NEXT: s_waitcnt vmcnt(2) +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v61 +; GFX950-NEXT: v_cvt_f64_f32_e32 v[48:49], v1 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[52:55], off offset:208 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[38:41], off offset:192 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[42:45], off offset:176 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[34:37], off offset:160 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[56:59], off offset:144 +; GFX950-NEXT: scratch_store_dwordx4 v0, v[48:51], off offset:128 ; GFX950-NEXT: scratch_store_dwordx4 v0, v[30:33], off offset:112 ; GFX950-NEXT: scratch_store_dwordx4 v0, v[26:29], off offset:96 ; GFX950-NEXT: scratch_store_dwordx4 v0, v[22:25], off offset:80 @@ -9211,12 +9380,11 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX950-NEXT: scratch_store_dwordx4 v0, v[10:13], off offset:32 ; GFX950-NEXT: scratch_store_dwordx4 v0, v[6:9], off offset:16 ; GFX950-NEXT: scratch_store_dwordx4 v0, v[2:5], off -; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse @@ -33878,34 +34046,34 @@ define <2 x i64> @v_fptosi_v2bf16_to_v2i64(<2 x bfloat> %x) { ; GFX950-LABEL: v_fptosi_v2bf16_to_v2i64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v0 ; GFX950-NEXT: v_trunc_f32_e32 v1, v1 ; GFX950-NEXT: s_mov_b32 s0, 0x2f800000 ; GFX950-NEXT: v_mul_f32_e64 v2, |v1|, s0 -; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GFX950-NEXT: v_floor_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s1, 0xcf800000 -; GFX950-NEXT: v_trunc_f32_e32 v4, v0 ; GFX950-NEXT: v_fma_f32 v3, v2, s1, |v1| -; GFX950-NEXT: v_mul_f32_e64 v0, |v4|, s0 -; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX950-NEXT: v_floor_f32_e32 v0, v0 ; GFX950-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GFX950-NEXT: v_fma_f32 v5, v0, s1, |v4| -; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v1 -; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v0 -; GFX950-NEXT: v_xor_b32_e32 v3, v3, v1 -; GFX950-NEXT: v_xor_b32_e32 v2, v2, v1 -; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v3, v1 -; GFX950-NEXT: v_ashrrev_i32_e32 v3, 31, v4 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc -; GFX950-NEXT: v_xor_b32_e32 v2, v5, v3 -; GFX950-NEXT: v_xor_b32_e32 v4, v6, v3 -; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 +; GFX950-NEXT: v_trunc_f32_e32 v0, v0 +; GFX950-NEXT: v_xor_b32_e32 v4, v2, v1 +; GFX950-NEXT: v_xor_b32_e32 v2, v3, v1 +; GFX950-NEXT: v_mul_f32_e64 v3, |v0|, s0 +; GFX950-NEXT: v_floor_f32_e32 v3, v3 +; GFX950-NEXT: v_fma_f32 v5, v3, s1, |v0| +; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v3 +; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v1 +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v1, vcc +; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX950-NEXT: v_xor_b32_e32 v0, v5, v1 +; GFX950-NEXT: v_xor_b32_e32 v4, v6, v1 +; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1 ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc +; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fptosi_v2bf16_to_v2i64: @@ -34161,48 +34329,47 @@ define <3 x i64> @v_fptosi_v3bf16_to_v3i64(<3 x bfloat> %x) { ; GFX950-LABEL: v_fptosi_v3bf16_to_v3i64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0 -; GFX950-NEXT: v_trunc_f32_e32 v2, v2 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX950-NEXT: v_trunc_f32_e32 v1, v1 ; GFX950-NEXT: s_mov_b32 s0, 0x2f800000 -; GFX950-NEXT: v_mul_f32_e64 v3, |v2|, s0 -; GFX950-NEXT: v_floor_f32_e32 v3, v3 +; GFX950-NEXT: v_mul_f32_e64 v2, |v1|, s0 +; GFX950-NEXT: v_floor_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s1, 0xcf800000 -; GFX950-NEXT: v_fma_f32 v4, v3, s1, |v2| -; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 -; GFX950-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GFX950-NEXT: v_trunc_f32_e32 v5, v0 +; GFX950-NEXT: v_fma_f32 v3, v2, s1, |v1| +; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 ; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX950-NEXT: v_mul_f32_e64 v0, |v5|, s0 -; GFX950-NEXT: v_floor_f32_e32 v0, v0 -; GFX950-NEXT: v_ashrrev_i32_e32 v2, 31, v2 -; GFX950-NEXT: v_fma_f32 v6, v0, s1, |v5| -; GFX950-NEXT: v_xor_b32_e32 v4, v4, v2 -; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v6 -; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX950-NEXT: v_xor_b32_e32 v3, v3, v2 -; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v0 -; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2 -; GFX950-NEXT: v_trunc_f32_e32 v1, v1 -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_subb_co_u32_e32 v6, vcc, v3, v2, vcc -; GFX950-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX950-NEXT: v_mul_f32_e64 v5, |v1|, s0 -; GFX950-NEXT: v_floor_f32_e32 v5, v5 -; GFX950-NEXT: v_xor_b32_e32 v2, v7, v3 -; GFX950-NEXT: v_fma_f32 v7, v5, s1, |v1| -; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GFX950-NEXT: v_xor_b32_e32 v4, v8, v3 -; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 +; GFX950-NEXT: v_trunc_f32_e32 v6, v4 +; GFX950-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GFX950-NEXT: v_mul_f32_e64 v4, |v6|, s0 +; GFX950-NEXT: v_floor_f32_e32 v4, v4 ; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v1 +; GFX950-NEXT: v_fma_f32 v5, v4, s1, |v6| +; GFX950-NEXT: v_xor_b32_e32 v3, v3, v1 +; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v5 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_xor_b32_e32 v2, v2, v1 +; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v4 +; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v3, v1 +; GFX950-NEXT: v_trunc_f32_e32 v0, v0 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc -; GFX950-NEXT: v_xor_b32_e32 v4, v7, v1 -; GFX950-NEXT: v_xor_b32_e32 v5, v5, v1 -; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v1 +; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v2, v1, vcc +; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v6 +; GFX950-NEXT: v_mul_f32_e64 v6, |v0|, s0 +; GFX950-NEXT: v_floor_f32_e32 v6, v6 +; GFX950-NEXT: v_xor_b32_e32 v2, v7, v1 +; GFX950-NEXT: v_fma_f32 v7, v6, s1, |v0| +; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GFX950-NEXT: v_xor_b32_e32 v3, v8, v1 +; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v1 +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v1, vcc +; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX950-NEXT: v_xor_b32_e32 v0, v7, v1 +; GFX950-NEXT: v_xor_b32_e32 v6, v6, v1 +; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1 ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v1, vcc -; GFX950-NEXT: v_mov_b32_e32 v1, v6 +; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fptosi_v3bf16_to_v3i64: @@ -34531,60 +34698,59 @@ define <4 x i64> @v_fptosi_v4bf16_to_v4i64(<4 x bfloat> %x) { ; GFX950-LABEL: v_fptosi_v4bf16_to_v4i64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_lshlrev_b32_e32 v2, 16, v0 +; GFX950-NEXT: v_and_b32_e32 v2, 0xffff0000, v1 ; GFX950-NEXT: v_trunc_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s0, 0x2f800000 ; GFX950-NEXT: v_mul_f32_e64 v3, |v2|, s0 +; GFX950-NEXT: v_lshlrev_b32_e32 v1, 16, v1 ; GFX950-NEXT: v_floor_f32_e32 v3, v3 ; GFX950-NEXT: s_mov_b32 s1, 0xcf800000 +; GFX950-NEXT: v_trunc_f32_e32 v1, v1 ; GFX950-NEXT: v_fma_f32 v4, v3, s1, |v2| -; GFX950-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GFX950-NEXT: v_mul_f32_e64 v5, |v1|, s0 ; GFX950-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GFX950-NEXT: v_trunc_f32_e32 v5, v0 +; GFX950-NEXT: v_floor_f32_e32 v5, v5 ; GFX950-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GFX950-NEXT: v_mul_f32_e64 v0, |v5|, s0 -; GFX950-NEXT: v_floor_f32_e32 v0, v0 +; GFX950-NEXT: v_fma_f32 v6, v5, s1, |v1| +; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v6 ; GFX950-NEXT: v_ashrrev_i32_e32 v2, 31, v2 -; GFX950-NEXT: v_fma_f32 v6, v0, s1, |v5| ; GFX950-NEXT: v_xor_b32_e32 v4, v4, v2 -; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX950-NEXT: v_xor_b32_e32 v3, v3, v2 -; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v0 -; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2 -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_subb_co_u32_e32 v8, vcc, v3, v2, vcc -; GFX950-NEXT: v_ashrrev_i32_e32 v3, 31, v5 -; GFX950-NEXT: v_lshlrev_b32_e32 v5, 16, v1 -; GFX950-NEXT: v_trunc_f32_e32 v5, v5 -; GFX950-NEXT: v_xor_b32_e32 v2, v6, v3 -; GFX950-NEXT: v_mul_f32_e64 v6, |v5|, s0 -; GFX950-NEXT: v_floor_f32_e32 v6, v6 -; GFX950-NEXT: v_xor_b32_e32 v4, v7, v3 -; GFX950-NEXT: v_fma_f32 v7, v6, s1, |v5| -; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX950-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 -; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 -; GFX950-NEXT: v_ashrrev_i32_e32 v5, 31, v5 -; GFX950-NEXT: v_trunc_f32_e32 v1, v1 -; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc -; GFX950-NEXT: v_xor_b32_e32 v4, v7, v5 -; GFX950-NEXT: v_mul_f32_e64 v7, |v1|, s0 -; GFX950-NEXT: v_floor_f32_e32 v7, v7 -; GFX950-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX950-NEXT: v_fma_f32 v9, v7, s1, |v1| -; GFX950-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GFX950-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GFX950-NEXT: v_xor_b32_e32 v6, v6, v5 -; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v5 +; GFX950-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX950-NEXT: v_sub_co_u32_e32 v6, vcc, v4, v2 ; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v1 +; GFX950-NEXT: v_and_b32_e32 v4, 0xffff0000, v0 +; GFX950-NEXT: v_subb_co_u32_e32 v7, vcc, v3, v2, vcc +; GFX950-NEXT: v_xor_b32_e32 v3, v8, v1 +; GFX950-NEXT: v_trunc_f32_e32 v8, v4 +; GFX950-NEXT: v_mul_f32_e64 v4, |v8|, s0 +; GFX950-NEXT: v_floor_f32_e32 v4, v4 +; GFX950-NEXT: v_xor_b32_e32 v2, v5, v1 +; GFX950-NEXT: v_fma_f32 v5, v4, s1, |v8| +; GFX950-NEXT: v_cvt_u32_f32_e32 v9, v5 +; GFX950-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX950-NEXT: v_cvt_u32_f32_e32 v10, v4 +; GFX950-NEXT: v_sub_co_u32_e32 v4, vcc, v3, v1 +; GFX950-NEXT: v_trunc_f32_e32 v0, v0 ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc -; GFX950-NEXT: v_xor_b32_e32 v6, v9, v1 -; GFX950-NEXT: v_xor_b32_e32 v7, v7, v1 -; GFX950-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v1 +; GFX950-NEXT: v_subb_co_u32_e32 v5, vcc, v2, v1, vcc +; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v8 +; GFX950-NEXT: v_mul_f32_e64 v8, |v0|, s0 +; GFX950-NEXT: v_floor_f32_e32 v8, v8 +; GFX950-NEXT: v_xor_b32_e32 v2, v9, v1 +; GFX950-NEXT: v_fma_f32 v9, v8, s1, |v0| +; GFX950-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX950-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GFX950-NEXT: v_xor_b32_e32 v3, v10, v1 +; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v1 +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v1, vcc +; GFX950-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX950-NEXT: v_xor_b32_e32 v0, v9, v1 +; GFX950-NEXT: v_xor_b32_e32 v8, v8, v1 +; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1 ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v1, vcc -; GFX950-NEXT: v_mov_b32_e32 v1, v8 +; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v1, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fptosi_v4bf16_to_v4i64: diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll index b08e9c439a9fe..74ae44d5210e3 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { @@ -167,25 +167,41 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void } @@ -212,26 +228,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -258,26 +291,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -306,28 +356,47 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908_GFX11-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll index b80aa9324e616..de41cfabc5fd4 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { @@ -153,26 +153,43 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32> } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn ; GFX11: bb.0 (%ir-block.0): @@ -199,27 +216,45 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -247,27 +282,45 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn ; GFX11: bb.0 (%ir-block.0): @@ -295,29 +348,49 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add } define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ; ; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn ; GFX11: bb.0 (%ir-block.0): diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll index 2ce54f8a463c7..1d993b68a1ae9 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFx90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn @@ -207,251 +207,655 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32 } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFx90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: S_ENDPGM 0 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec - ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec + ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec + ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec + ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec + ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec - ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec + ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec + ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec + ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec + ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec + ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec - ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec + ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec + ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec + ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec + ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec + ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret double %ret } define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec - ; GFX90A_GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 - ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec - ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec + ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec + ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[COPY9:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec + ; GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec + ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 + ; + ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn + ; GFx90A: bb.0 (%ir-block.0): + ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4 + ; GFx90A-NEXT: {{ $}} + ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3 + ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GFx90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:av_64_align2 = COPY [[REG_SEQUENCE3]] + ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) + ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec + ; GFx90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 + ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec + ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret double %ret } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll index c30b5549776ea..b051674a915ca 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn @@ -165,25 +165,41 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) ret void } @@ -210,26 +226,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void } @@ -256,26 +289,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void } @@ -304,28 +354,47 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) ; GFX908-NEXT: S_ENDPGM 0 ; - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: S_ENDPGM 0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: S_ENDPGM 0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll index 2abd7edade8a1..9a7ba94c3c083 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn @@ -86,106 +86,179 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret <2 x half> %ret } define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { - ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn - ; GFX90A_GFX942: bb.0 (%ir-block.0): - ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 - ; GFX90A_GFX942-NEXT: {{ $}} - ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 - ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 - ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 - ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) - ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] - ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn + ; GFX90A: bb.0 (%ir-block.0): + ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3 + ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0 + ; + ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn + ; GFX942: bb.0 (%ir-block.0): + ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4 + ; GFX942-NEXT: {{ $}} + ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 + ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3 + ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 + ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3 + ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) + ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]] + ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0 %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll index ddd3b1520bf5e..67924d1b42d56 100644 --- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll @@ -2182,32 +2182,36 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) { ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX1250-NEXT: v_mov_b64_e32 v[4:5], 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_lshr_b32 s6, s1, 16 -; GFX1250-NEXT: s_lshr_b32 s7, s1, 24 ; GFX1250-NEXT: s_lshr_b32 s8, s2, 16 ; GFX1250-NEXT: s_lshr_b32 s9, s2, 24 -; GFX1250-NEXT: s_lshr_b32 s10, s3, 16 -; GFX1250-NEXT: s_lshr_b32 s11, s3, 24 +; GFX1250-NEXT: s_bfe_u32 s14, s2, 0x80008 +; GFX1250-NEXT: s_add_co_i32 s2, s2, s2 +; GFX1250-NEXT: s_add_co_i32 s14, s14, s14 +; GFX1250-NEXT: s_add_co_i32 s9, s9, s9 +; GFX1250-NEXT: s_add_co_i32 s8, s8, s8 +; GFX1250-NEXT: s_lshr_b32 s6, s1, 16 +; GFX1250-NEXT: s_lshr_b32 s7, s1, 24 +; GFX1250-NEXT: s_and_b32 s2, s2, 0xff +; GFX1250-NEXT: s_lshl_b32 s14, s14, 8 +; GFX1250-NEXT: s_lshl_b32 s9, s9, 8 +; GFX1250-NEXT: s_and_b32 s8, s8, 0xff ; GFX1250-NEXT: s_lshr_b32 s4, s0, 16 ; GFX1250-NEXT: s_lshr_b32 s5, s0, 24 +; GFX1250-NEXT: s_lshr_b32 s10, s3, 16 +; GFX1250-NEXT: s_lshr_b32 s11, s3, 24 ; GFX1250-NEXT: s_bfe_u32 s12, s0, 0x80008 ; GFX1250-NEXT: s_bfe_u32 s13, s1, 0x80008 -; GFX1250-NEXT: s_bfe_u32 s14, s2, 0x80008 ; GFX1250-NEXT: s_bfe_u32 s15, s3, 0x80008 -; GFX1250-NEXT: s_add_co_i32 s11, s11, s11 -; GFX1250-NEXT: s_add_co_i32 s10, s10, s10 -; GFX1250-NEXT: s_add_co_i32 s9, s9, s9 -; GFX1250-NEXT: s_add_co_i32 s8, s8, s8 +; GFX1250-NEXT: s_or_b32 s2, s2, s14 +; GFX1250-NEXT: s_or_b32 s8, s8, s9 ; GFX1250-NEXT: s_add_co_i32 s7, s7, s7 ; GFX1250-NEXT: s_add_co_i32 s6, s6, s6 ; GFX1250-NEXT: s_add_co_i32 s3, s3, s3 -; GFX1250-NEXT: s_add_co_i32 s2, s2, s2 ; GFX1250-NEXT: s_add_co_i32 s15, s15, s15 -; GFX1250-NEXT: s_add_co_i32 s14, s14, s14 -; GFX1250-NEXT: s_lshl_b32 s11, s11, 8 -; GFX1250-NEXT: s_and_b32 s10, s10, 0xff -; GFX1250-NEXT: s_lshl_b32 s9, s9, 8 -; GFX1250-NEXT: s_and_b32 s8, s8, 0xff +; GFX1250-NEXT: s_add_co_i32 s11, s11, s11 +; GFX1250-NEXT: s_add_co_i32 s10, s10, s10 +; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff +; GFX1250-NEXT: s_lshl_b32 s8, s8, 16 ; GFX1250-NEXT: s_add_co_i32 s1, s1, s1 ; GFX1250-NEXT: s_add_co_i32 s13, s13, s13 ; GFX1250-NEXT: s_lshl_b32 s7, s7, 8 @@ -2217,38 +2221,33 @@ define amdgpu_kernel void @amd_kernel_v16i8(<16 x i8> %arg0) { ; GFX1250-NEXT: s_add_co_i32 s5, s5, s5 ; GFX1250-NEXT: s_add_co_i32 s4, s4, s4 ; GFX1250-NEXT: s_and_b32 s3, s3, 0xff -; GFX1250-NEXT: s_and_b32 s2, s2, 0xff ; GFX1250-NEXT: s_lshl_b32 s15, s15, 8 -; GFX1250-NEXT: s_or_b32 s10, s10, s11 -; GFX1250-NEXT: s_lshl_b32 s11, s14, 8 -; GFX1250-NEXT: s_or_b32 s8, s8, s9 +; GFX1250-NEXT: s_lshl_b32 s11, s11, 8 +; GFX1250-NEXT: s_and_b32 s10, s10, 0xff +; GFX1250-NEXT: s_or_b32 s2, s2, s8 ; GFX1250-NEXT: s_and_b32 s1, s1, 0xff -; GFX1250-NEXT: s_lshl_b32 s9, s13, 8 +; GFX1250-NEXT: s_lshl_b32 s8, s13, 8 ; GFX1250-NEXT: s_or_b32 s6, s6, s7 ; GFX1250-NEXT: s_and_b32 s0, s0, 0xff ; GFX1250-NEXT: s_lshl_b32 s7, s12, 8 ; GFX1250-NEXT: s_lshl_b32 s5, s5, 8 ; GFX1250-NEXT: s_and_b32 s4, s4, 0xff ; GFX1250-NEXT: s_or_b32 s3, s3, s15 -; GFX1250-NEXT: s_or_b32 s2, s2, s11 -; GFX1250-NEXT: s_or_b32 s1, s1, s9 +; GFX1250-NEXT: s_or_b32 s10, s10, s11 +; GFX1250-NEXT: s_or_b32 s1, s1, s8 ; GFX1250-NEXT: s_or_b32 s0, s0, s7 ; GFX1250-NEXT: s_or_b32 s4, s4, s5 ; GFX1250-NEXT: s_and_b32 s3, s3, 0xffff ; GFX1250-NEXT: s_lshl_b32 s10, s10, 16 -; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff -; GFX1250-NEXT: s_lshl_b32 s8, s8, 16 ; GFX1250-NEXT: s_and_b32 s1, s1, 0xffff +; GFX1250-NEXT: s_lshl_b32 s6, s6, 16 ; GFX1250-NEXT: s_and_b32 s0, s0, 0xffff ; GFX1250-NEXT: s_lshl_b32 s4, s4, 16 -; GFX1250-NEXT: s_lshl_b32 s5, s6, 16 ; GFX1250-NEXT: s_or_b32 s3, s3, s10 -; GFX1250-NEXT: s_or_b32 s2, s2, s8 +; GFX1250-NEXT: s_or_b32 s1, s1, s6 ; GFX1250-NEXT: s_or_b32 s0, s0, s4 -; GFX1250-NEXT: s_or_b32 s1, s1, s5 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX1250-NEXT: global_store_b128 v[4:5], v[0:3], off ; GFX1250-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 4eaa1965c66f1..54b301c960df7 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -110,10 +110,8 @@ define amdgpu_kernel void @zero_init_kernel() { ; ; GFX942-LABEL: zero_init_kernel: ; GFX942: ; %bb.0: -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:48 @@ -304,10 +302,8 @@ define void @zero_init_foo() { ; GFX942-LABEL: zero_init_foo: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48 @@ -1180,10 +1176,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX942: ; %bb.0: ; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256 @@ -1397,10 +1391,8 @@ define void @zero_init_small_offset_foo() { ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: scratch_load_dword v0, off, s32 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256 @@ -2420,10 +2412,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX942: ; %bb.0: ; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-NEXT: s_movk_i32 s0, 0x4004 @@ -2656,10 +2646,8 @@ define void @zero_init_large_offset_foo() { ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: scratch_load_dword v0, off, s32 offset:4 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-NEXT: s_add_i32 s0, s32, 0x4004 @@ -5021,10 +5009,8 @@ define amdgpu_ps void @large_offset() { ; ; GFX942-LABEL: large_offset: ; GFX942: ; %bb.0: ; %bb -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1 diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll index 069a47ec97bfe..20e7a15c829a9 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll @@ -4010,8 +4010,8 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double ; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5] ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.maximum.f64(double %a, double %b) %max1 = call double @llvm.maximum.f64(double %max0, double %c) diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll index d8746b58b16b7..8aebd6f719040 100644 --- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll +++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll @@ -4010,8 +4010,8 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double ; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5] ; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] ; GFX9-NEXT: s_nop 1 -; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] %max0 = call double @llvm.minimum.f64(double %a, double %b) %max1 = call double @llvm.minimum.f64(double %max0, double %c) diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll index e532deaca98a8..8a5a50c5070e7 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll @@ -288,15 +288,15 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0 ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[50:51], v[2:3], v[2:3] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[48:49], v[0:1], v[0:1] -; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[14:15], 0xc8, v[14:15] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[16:17], v[16:17], v[16:17] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[24:25], 0x64, v[24:25] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[22:23], v[22:23], v[22:23] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21] +; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[28:29], v[28:29], v[28:29] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[26:27], v[26:27], v[26:27] ; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[30:31], v[30:31], v[30:31] -; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[20:21], v[20:21], v[20:21] -; GCN-SDAG-NEXT: v_add_nc_u64_e32 v[18:19], v[18:19], v[18:19] ; GCN-SDAG-NEXT: s_clause 0x1 ; GCN-SDAG-NEXT: global_store_b128 v[52:53], v[34:37], off ; GCN-SDAG-NEXT: global_store_b128 v[54:55], v[0:3], off diff --git a/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll index 75c5d206e7933..a30afd000b6d4 100644 --- a/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll +++ b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll @@ -82,10 +82,8 @@ define <3 x float> @extract_subvector_v3f32_v33f32_elt30_1(ptr addrspace(1) %ptr ; GFX942-NEXT: global_load_dwordx4 v[4:7], v[0:1], off ; GFX942-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:112 ; GFX942-NEXT: global_load_dword v2, v[0:1], off offset:128 -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(2) ; GFX942-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 ; GFX942-NEXT: s_waitcnt vmcnt(2) @@ -125,10 +123,8 @@ define <6 x float> @extract_subvector_v6f32_v36f32_elt30(ptr addrspace(1) %ptr) ; GFX942-NEXT: global_load_dwordx4 v[6:9], v[0:1], off ; GFX942-NEXT: global_load_dwordx4 v[10:13], v[0:1], off offset:112 ; GFX942-NEXT: global_load_dwordx4 v[2:5], v[0:1], off offset:128 -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(2) ; GFX942-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 ; GFX942-NEXT: s_waitcnt vmcnt(2) @@ -165,10 +161,8 @@ define <3 x float> @issue153808_vector_extract_assert(ptr addrspace(1) %ptr) #0 ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: global_load_dwordx4 v[6:9], v[0:1], off ; GFX942-NEXT: global_load_dwordx3 v[2:4], v[0:1], off offset:192 -; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s1, s0 -; GFX942-NEXT: s_mov_b32 s2, s0 -; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_mov_b64 s[0:1], 0 +; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(1) ; GFX942-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 ; GFX942-NEXT: s_waitcnt vmcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll index 5ab8706f28f5f..7edb30d1fbd57 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -74,10 +74,9 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-LABEL: test_mfma_f32_32x32x4bf16_1k: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 -; GFX942-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, 1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: v_mov_b32_e32 v0, 2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0 ; GFX942-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40 @@ -115,17 +114,17 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-NEXT: v_accvgpr_write_b32 a30, s14 ; GFX942-NEXT: v_accvgpr_write_b32 a31, s15 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[2:3], v[0:1], a[0:31] cbsz:1 abid:2 blgp:3 +; GFX942-NEXT: v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3 ; GFX942-NEXT: s_nop 15 ; GFX942-NEXT: s_nop 2 -; GFX942-NEXT: global_store_dwordx4 v1, a[24:27], s[34:35] offset:96 -; GFX942-NEXT: global_store_dwordx4 v1, a[28:31], s[34:35] offset:112 -; GFX942-NEXT: global_store_dwordx4 v1, a[16:19], s[34:35] offset:64 -; GFX942-NEXT: global_store_dwordx4 v1, a[20:23], s[34:35] offset:80 -; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[34:35] offset:32 -; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[34:35] offset:48 -; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[34:35] -; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[34:35] offset:16 +; GFX942-NEXT: global_store_dwordx4 v4, a[24:27], s[34:35] offset:96 +; GFX942-NEXT: global_store_dwordx4 v4, a[28:31], s[34:35] offset:112 +; GFX942-NEXT: global_store_dwordx4 v4, a[16:19], s[34:35] offset:64 +; GFX942-NEXT: global_store_dwordx4 v4, a[20:23], s[34:35] offset:80 +; GFX942-NEXT: global_store_dwordx4 v4, a[8:11], s[34:35] offset:32 +; GFX942-NEXT: global_store_dwordx4 v4, a[12:15], s[34:35] offset:48 +; GFX942-NEXT: global_store_dwordx4 v4, a[0:3], s[34:35] +; GFX942-NEXT: global_store_dwordx4 v4, a[4:7], s[34:35] offset:16 ; GFX942-NEXT: s_endpgm ; ; GFX90A-VGPR-LABEL: test_mfma_f32_32x32x4bf16_1k: @@ -188,10 +187,9 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x4bf16_1k: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v34, 1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v35, v33 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 2 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[32:33], 1 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[34:35], 2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v36, 0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x0 ; GFX942-VGPR-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x40 @@ -229,17 +227,17 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-VGPR-NEXT: v_mov_b32_e32 v30, s14 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v31, s15 ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[34:35], v[32:33], v[0:31] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[32:33], v[34:35], v[0:31] cbsz:1 abid:2 blgp:3 ; GFX942-VGPR-NEXT: s_nop 15 ; GFX942-VGPR-NEXT: s_nop 2 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[24:27], s[34:35] offset:96 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[28:31], s[34:35] offset:112 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[16:19], s[34:35] offset:64 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[20:23], s[34:35] offset:80 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[8:11], s[34:35] offset:32 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[12:15], s[34:35] offset:48 -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[0:3], s[34:35] -; GFX942-VGPR-NEXT: global_store_dwordx4 v33, v[4:7], s[34:35] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[24:27], s[34:35] offset:96 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[28:31], s[34:35] offset:112 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[16:19], s[34:35] offset:64 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[20:23], s[34:35] offset:80 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[8:11], s[34:35] offset:32 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[12:15], s[34:35] offset:48 +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[0:3], s[34:35] +; GFX942-VGPR-NEXT: global_store_dwordx4 v36, v[4:7], s[34:35] offset:16 ; GFX942-VGPR-NEXT: s_endpgm bb: %in.1 = load <32 x float>, ptr addrspace(1) %arg @@ -289,10 +287,8 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-LABEL: test_mfma_f32_16x16x4bf16_1k: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 -; GFX942-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, 1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: v_mov_b32_e32 v0, 2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) @@ -313,12 +309,13 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-NEXT: v_accvgpr_write_b32 a14, s14 ; GFX942-NEXT: v_accvgpr_write_b32 a15, s15 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3 -; GFX942-NEXT: s_nop 10 -; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[16:17] offset:48 -; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[16:17] offset:32 -; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[16:17] offset:16 -; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[16:17] +; GFX942-NEXT: v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_nop 9 +; GFX942-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 +; GFX942-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 +; GFX942-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 +; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] ; GFX942-NEXT: s_endpgm ; ; GFX90A-VGPR-LABEL: test_mfma_f32_16x16x4bf16_1k: @@ -351,10 +348,8 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-VGPR-LABEL: test_mfma_f32_16x16x4bf16_1k: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v17 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 2 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], 1 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], 2 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) @@ -367,12 +362,13 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) # ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[12:13] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[18:19], v[16:17], v[0:15] cbsz:1 abid:2 blgp:3 -; GFX942-VGPR-NEXT: s_nop 10 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[12:15], s[16:17] offset:48 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[8:11], s[16:17] offset:32 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[4:7], s[16:17] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[0:3], s[16:17] +; GFX942-VGPR-NEXT: v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-VGPR-NEXT: s_nop 9 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] ; GFX942-VGPR-NEXT: s_endpgm bb: %in.1 = load <16 x float>, ptr addrspace(1) %arg @@ -407,10 +403,9 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 ; GFX942-LABEL: test_mfma_f32_4x4x4bf16_1k: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; GFX942-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, 1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: v_mov_b32_e32 v0, 2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) @@ -419,9 +414,9 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 ; GFX942-NEXT: v_accvgpr_write_b32 a2, s2 ; GFX942-NEXT: v_accvgpr_write_b32 a3, s3 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[2:3], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-NEXT: v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 ; GFX942-NEXT: s_nop 4 -; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[6:7] +; GFX942-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7] ; GFX942-NEXT: s_endpgm ; ; GFX90A-VGPR-LABEL: test_mfma_f32_4x4x4bf16_1k: @@ -445,19 +440,18 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 ; GFX942-VGPR-LABEL: test_mfma_f32_4x4x4bf16_1k: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, 1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v5 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, 2 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], 1 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], 2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[6:7], v[4:5], v[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3 ; GFX942-VGPR-NEXT: s_nop 4 -; GFX942-VGPR-NEXT: global_store_dwordx4 v5, v[0:3], s[6:7] +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7] ; GFX942-VGPR-NEXT: s_endpgm bb: %in.1 = load <4 x float>, ptr addrspace(1) %arg @@ -508,10 +502,8 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) # ; GFX942-LABEL: test_mfma_f32_32x32x8bf16_1k: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 -; GFX942-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, 1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: v_mov_b32_e32 v0, 2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) @@ -532,12 +524,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) # ; GFX942-NEXT: v_accvgpr_write_b32 a14, s14 ; GFX942-NEXT: v_accvgpr_write_b32 a15, s15 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_mfma_f32_32x32x8_bf16 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3 -; GFX942-NEXT: s_nop 10 -; GFX942-NEXT: global_store_dwordx4 v1, a[12:15], s[16:17] offset:48 -; GFX942-NEXT: global_store_dwordx4 v1, a[8:11], s[16:17] offset:32 -; GFX942-NEXT: global_store_dwordx4 v1, a[4:7], s[16:17] offset:16 -; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[16:17] +; GFX942-NEXT: v_mfma_f32_32x32x8_bf16 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: s_nop 9 +; GFX942-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 +; GFX942-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 +; GFX942-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 +; GFX942-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] ; GFX942-NEXT: s_endpgm ; ; GFX90A-VGPR-LABEL: test_mfma_f32_32x32x8bf16_1k: @@ -571,10 +564,8 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) # ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8bf16_1k: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v17 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 2 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], 1 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], 2 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) @@ -587,12 +578,13 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) # ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[12:13] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], s[14:15] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_bf16 v[0:15], v[18:19], v[16:17], v[0:15] cbsz:1 abid:2 blgp:3 -; GFX942-VGPR-NEXT: s_nop 10 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[12:15], s[16:17] offset:48 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[8:11], s[16:17] offset:32 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[4:7], s[16:17] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v17, v[0:3], s[16:17] +; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_bf16 v[0:15], v[16:17], v[18:19], v[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 +; GFX942-VGPR-NEXT: s_nop 9 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[16:17] offset:48 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[16:17] offset:32 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[16:17] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[0:3], s[16:17] ; GFX942-VGPR-NEXT: s_endpgm bb: %in.1 = load <16 x float>, ptr addrspace(1) %arg @@ -627,10 +619,9 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) ; GFX942-LABEL: test_mfma_f32_16x16x16bf16_1k: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; GFX942-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, 1 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-NEXT: v_mov_b32_e32 v0, 2 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 2 +; GFX942-NEXT: v_mov_b32_e32 v4, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) @@ -639,9 +630,9 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) ; GFX942-NEXT: v_accvgpr_write_b32 a2, s2 ; GFX942-NEXT: v_accvgpr_write_b32 a3, s3 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_mfma_f32_16x16x16_bf16 a[0:3], v[2:3], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-NEXT: v_mfma_f32_16x16x16_bf16 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 ; GFX942-NEXT: s_nop 6 -; GFX942-NEXT: global_store_dwordx4 v1, a[0:3], s[6:7] +; GFX942-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7] ; GFX942-NEXT: s_endpgm ; ; GFX90A-VGPR-LABEL: test_mfma_f32_16x16x16bf16_1k: @@ -665,19 +656,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) ; GFX942-VGPR-LABEL: test_mfma_f32_16x16x16bf16_1k: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, 1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v5 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, 2 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], 1 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], 2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f32_16x16x16_bf16 v[0:3], v[6:7], v[4:5], v[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-VGPR-NEXT: v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] cbsz:1 abid:2 blgp:3 ; GFX942-VGPR-NEXT: s_nop 6 -; GFX942-VGPR-NEXT: global_store_dwordx4 v5, v[0:3], s[6:7] +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7] ; GFX942-VGPR-NEXT: s_endpgm bb: %in.1 = load <4 x float>, ptr addrspace(1) %arg @@ -1273,14 +1263,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bit ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-NEXT: v_accvgpr_write_b32 a0, 0 ; GFX942-NEXT: v_accvgpr_write_b32 a1, 64 -; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] ; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] @@ -1326,26 +1316,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bit ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 64 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], s[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11], v[12:13], v[2:9] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 15 -; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: s_nop 0 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> splat (double bitcast (i64 274877906944 to double)), i32 0, i32 0, i32 0) @@ -1386,14 +1370,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-NEXT: v_accvgpr_write_b32 a0, 64 ; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a7, a0 ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] ; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] @@ -1433,18 +1417,16 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and ; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 64 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-VGPR-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 +; GFX942-VGPR-NEXT: s_mov_b32 s6, 64 +; GFX942-VGPR-NEXT: s_mov_b32 s7, s6 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[6:7] ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[8:9] ; GFX942-VGPR-NEXT: s_nop 1 ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] @@ -1493,14 +1475,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_ ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-NEXT: v_accvgpr_write_b32 a0, 1.0 ; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[2:3] -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0 +; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a7, a0 ; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[6:7] ; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] @@ -1540,18 +1522,16 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_ ; GFX942-VGPR-LABEL: test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low: ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 +; GFX942-VGPR-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 +; GFX942-VGPR-NEXT: s_mov_b32 s6, 1.0 +; GFX942-VGPR-NEXT: s_mov_b32 s7, s6 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[6:7] ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[2:3] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v0 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[8:9] ; GFX942-VGPR-NEXT: s_nop 1 ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9], v[10:11], v[0:7] cbsz:1 abid:2 neg:[1,1,0] @@ -1600,17 +1580,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-NEXT: v_accvgpr_write_b32 a0, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0x3ff00000 -; GFX942-NEXT: v_accvgpr_write_b32 a7, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a1, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 1.0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_mov_b32_e32 v2, s2 ; GFX942-NEXT: v_mov_b32_e32 v3, s3 -; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0 +; GFX942-NEXT: v_accvgpr_write_b32 a7, v1 +; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1 +; GFX942-NEXT: v_accvgpr_write_b32 a6, v0 ; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0 ; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] ; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7] @@ -1653,28 +1633,21 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d ; GFX942-VGPR: ; %bb.0: ; %bb ; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], 1.0 ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 15 -; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: s_nop 0 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) @@ -1711,20 +1684,27 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) % ; ; GFX942-LABEL: test_mfma_f64_16x16x4f64_splat_lit: ; GFX942: ; %bb.0: ; %bb +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000 +; GFX942-NEXT: v_accvgpr_write_b32 a0, v0 ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 -; GFX942-NEXT: v_mov_b32_e32 v0, 0x405ec000 -; GFX942-NEXT: v_accvgpr_write_b32 a0, 0 -; GFX942-NEXT: v_accvgpr_write_b32 a1, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a1, v1 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000 +; GFX942-NEXT: v_accvgpr_write_b32 a3, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a2, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: v_mov_b32_e32 v2, s2 ; GFX942-NEXT: v_mov_b32_e32 v3, s3 -; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1 +; GFX942-NEXT: v_accvgpr_write_b32 a5, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a4, v0 +; GFX942-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-NEXT: v_mov_b32_e32 v1, 0x405ec000 +; GFX942-NEXT: v_accvgpr_write_b32 a7, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a6, v0 ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] ; GFX942-NEXT: s_nop 1 ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3], v[0:1], a[0:7] @@ -1769,26 +1749,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) % ; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, s2 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, s3 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], s[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, s2 +; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, s3 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], s[6:7] ; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13], v[10:11], v[2:9] +; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11], v[8:9], v[0:7] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-VGPR-NEXT: s_nop 15 -; GFX942-VGPR-NEXT: s_nop 1 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: s_nop 0 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 7e30af96bb8b9..3cd009126666c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -5489,38 +5489,38 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) # ; GFX942-LABEL: test_mfma_f32_32x32x1f32_imm: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: v_mov_b32_e32 v1, 1.0 -; GFX942-NEXT: v_accvgpr_write_b32 a1, 0 +; GFX942-NEXT: v_accvgpr_write_b32 a2, 0 +; GFX942-NEXT: v_accvgpr_write_b32 a3, 0 ; GFX942-NEXT: v_accvgpr_write_b32 a0, 1.0 -; GFX942-NEXT: v_accvgpr_mov_b32 a2, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a4, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a5, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a6, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a7, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a8, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a9, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a10, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a11, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a12, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a13, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a14, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a15, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a16, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a17, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a18, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a19, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a20, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a21, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a22, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a23, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a24, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a25, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a26, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a27, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a28, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a29, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a30, a1 -; GFX942-NEXT: v_accvgpr_mov_b32 a31, a1 +; GFX942-NEXT: v_accvgpr_mov_b32 a5, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a7, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a9, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a11, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a13, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a15, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a17, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a19, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a21, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a23, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a25, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a27, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a29, a3 +; GFX942-NEXT: v_accvgpr_mov_b32 a31, a3 +; GFX942-NEXT: v_accvgpr_write_b32 a1, 0 +; GFX942-NEXT: v_accvgpr_mov_b32 a4, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a6, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a8, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a10, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a12, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a14, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a16, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a18, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a20, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a22, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a24, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a26, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a28, a2 +; GFX942-NEXT: v_accvgpr_mov_b32 a30, a2 ; GFX942-NEXT: v_mov_b32_e32 v2, 2.0 ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX942-NEXT: v_mov_b32_e32 v0, 0 @@ -5540,69 +5540,38 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) # ; ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x1f32_imm: ; GFX942-VGPR: ; %bb.0: ; %bb -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v9, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v10, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v11, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v12, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v13, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v14, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v15, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v20, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v21, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v22, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v23, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v24, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v25, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v26, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v27, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v28, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v29, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v30, v1 -; GFX942-VGPR-NEXT: v_mov_b32_e32 v31, v1 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[32:33], v[30:31] -; GFX942-VGPR-NEXT: v_mov_b32_e32 v34, 2.0 -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[30:31], v[28:29] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[28:29], v[26:27] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[26:27], v[24:25] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[24:25], v[22:23] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[22:23], v[20:21] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[20:21], v[18:19] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], v[16:17] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], v[14:15] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], v[12:13] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], v[10:11] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], v[8:9] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 1.0 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], 0 +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], 0x3f800000 ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3] -; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[14:15], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[16:17], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[18:19], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[20:21], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[22:23], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[24:25], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[26:27], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[28:29], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b64_e32 v[30:31], v[2:3] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v34, 2.0 ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX942-VGPR-NEXT: s_nop 0 -; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[2:33], v0, v34, v[2:33] +; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 0 +; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v33, v34, v[0:31] ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-VGPR-NEXT: s_nop 15 ; GFX942-VGPR-NEXT: s_nop 0 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[30:33], s[0:1] offset:112 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[26:29], s[0:1] offset:96 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[22:25], s[0:1] offset:80 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[18:21], s[0:1] offset:64 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[14:17], s[0:1] offset:48 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[10:13], s[0:1] offset:32 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[6:9], s[0:1] offset:16 -; GFX942-VGPR-NEXT: global_store_dwordx4 v1, v[2:5], s[0:1] +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:64 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:48 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:32 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] offset:16 +; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] ; GFX942-VGPR-NEXT: s_endpgm bb: %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index f971080e02c5b..090707eda3ca5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -561,17 +561,17 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) { ; GFX950-LABEL: v_maximum_v2f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc +; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[6:7] +; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000 ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_max_f64 v[6:7], v[0:1], v[4:5] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f64: @@ -630,12 +630,19 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v2f64__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v2f64__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v2f64__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f64__nnan: ; GFX10: ; %bb.0: @@ -711,17 +718,17 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1 ; GFX950-LABEL: v_maximum_v2f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7] -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc +; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[6:7] +; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000 ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_max_f64 v[6:7], v[0:1], v[4:5] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f64__nsz: @@ -780,12 +787,19 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double> ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v2f64__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v2f64__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v2f64__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v2f64__nnan_nsz: ; GFX10: ; %bb.0: @@ -1008,22 +1022,22 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) { ; GFX950-LABEL: v_maximum_v3f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] +; GFX950-NEXT: v_max_f64 v[12:13], v[4:5], v[10:11] +; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_max_f64 v[10:11], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[6:7] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc -; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f64: @@ -1092,13 +1106,21 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v3f64__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v3f64__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v3f64__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f64__nnan: ; GFX10: ; %bb.0: @@ -1189,22 +1211,22 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1 ; GFX950-LABEL: v_maximum_v3f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9] +; GFX950-NEXT: v_max_f64 v[12:13], v[4:5], v[10:11] +; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_max_f64 v[10:11], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[6:7] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc -; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f64__nsz: @@ -1273,13 +1295,21 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double> ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v3f64__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v3f64__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v3f64__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v3f64__nnan_nsz: ; GFX10: ; %bb.0: @@ -1382,27 +1412,27 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) { ; GFX950-LABEL: v_maximum_v4f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] +; GFX950-NEXT: v_max_f64 v[16:17], v[6:7], v[14:15] +; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_max_f64 v[14:15], v[4:5], v[12:13] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc -; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13] +; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_max_f64 v[12:13], v[2:3], v[10:11] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_max_f64 v[10:11], v[0:1], v[8:9] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc -; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f64: @@ -1482,14 +1512,23 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v4f64__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v4f64__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v4f64__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f64__nnan: ; GFX10: ; %bb.0: @@ -1595,27 +1634,27 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1 ; GFX950-LABEL: v_maximum_v4f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11] +; GFX950-NEXT: v_max_f64 v[16:17], v[6:7], v[14:15] +; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_max_f64 v[14:15], v[4:5], v[12:13] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc -; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13] +; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_max_f64 v[12:13], v[2:3], v[10:11] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_max_f64 v[10:11], v[0:1], v[8:9] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc -; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f64__nsz: @@ -1695,14 +1734,23 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double> ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximum_v4f64__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximum_v4f64__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximum_v4f64__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v4f64__nnan_nsz: ; GFX10: ; %bb.0: @@ -1864,43 +1912,43 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000 -; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] -; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19] -; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21] -; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19] +; GFX950-NEXT: v_max_f64 v[32:33], v[12:13], v[28:29] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29] +; GFX950-NEXT: v_max_f64 v[34:35], v[10:11], v[26:27] +; GFX950-NEXT: v_max_f64 v[36:37], v[8:9], v[24:25] +; GFX950-NEXT: v_cndmask_b32_e32 v13, v33, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v12, v32, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27] ; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23] -; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25] -; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21] -; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27] -; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29] -; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23] -; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31] -; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc +; GFX950-NEXT: v_max_f64 v[48:49], v[4:5], v[20:21] +; GFX950-NEXT: v_cndmask_b32_e32 v11, v35, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v10, v34, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25] +; GFX950-NEXT: v_max_f64 v[50:51], v[2:3], v[18:19] +; GFX950-NEXT: v_max_f64 v[52:53], v[0:1], v[16:17] +; GFX950-NEXT: v_cndmask_b32_e32 v9, v37, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v8, v36, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27] +; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29] +; GFX950-NEXT: v_cndmask_b32_e32 v5, v49, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v48, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v51, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v50, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v53, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v52, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v8f64: @@ -2371,152 +2419,144 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse -; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8 -; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4 -; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16 -; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12 -; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24 -; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20 +; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:120 +; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:116 ; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32 ; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28 -; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40 -; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36 -; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48 -; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44 -; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56 -; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52 -; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64 -; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60 -; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72 -; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68 -; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80 -; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76 -; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88 -; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84 -; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96 -; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92 +; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:24 +; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:20 +; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:16 +; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:12 +; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:8 +; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:4 +; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:112 ; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104 -; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100 +; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:128 +; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:124 +; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:108 +; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:104 +; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:100 +; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:96 +; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:92 +; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:88 +; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:84 +; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:80 +; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:76 +; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:72 +; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:68 +; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] -; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112 -; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108 -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_max_f64 v[60:61], v[2:3], v[36:37] -; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37] -; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120 -; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116 -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_max_f64 v[62:63], v[4:5], v[38:39] -; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39] -; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128 -; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000 -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_max_f64 v[0:1], v[6:7], v[56:57] -; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57] +; GFX950-NEXT: v_mov_b32_e32 v60, 0x7ff80000 ; GFX950-NEXT: s_waitcnt vmcnt(23) -; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47] -; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47] -; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5] -; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5] -; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(21) -; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[44:45] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45] -; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1] -; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1] -; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc +; GFX950-NEXT: v_max_f64 v[46:47], v[28:29], v[40:41] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[40:41] +; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64 +; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_max_f64 v[58:59], v[6:7], v[56:57] +; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[6:7], v[56:57] +; GFX950-NEXT: scratch_load_dword v7, off, s32 offset:56 +; GFX950-NEXT: scratch_load_dword v6, off, s32 offset:52 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_max_f64 v[56:57], v[4:5], v[44:45] +; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[44:45] +; GFX950-NEXT: scratch_load_dword v5, off, s32 offset:48 +; GFX950-NEXT: scratch_load_dword v4, off, s32 offset:44 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_max_f64 v[44:45], v[2:3], v[42:43] +; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[42:43] +; GFX950-NEXT: scratch_load_dword v3, off, s32 offset:40 +; GFX950-NEXT: scratch_load_dword v2, off, s32 offset:36 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_max_f64 v[42:43], v[0:1], v[52:53] +; GFX950-NEXT: v_cmp_u_f64_e64 s[6:7], v[0:1], v[52:53] ; GFX950-NEXT: s_waitcnt vmcnt(19) -; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[42:43] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43] -; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3] -; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3] -; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(17) -; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41] -; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(15) -; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[54:55] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55] -; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(13) -; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[52:53] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53] -; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[54:55] +; GFX950-NEXT: s_waitcnt vmcnt(18) +; GFX950-NEXT: v_max_f64 v[52:53], v[26:27], v[48:49] +; GFX950-NEXT: v_cmp_u_f64_e64 s[8:9], v[30:31], v[54:55] +; GFX950-NEXT: v_cndmask_b32_e32 v29, v47, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v28, v46, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[48:49] +; GFX950-NEXT: v_cndmask_b32_e64 v31, v1, v60, s[8:9] +; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, s[8:9] +; GFX950-NEXT: v_cndmask_b32_e32 v27, v53, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v26, v52, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(16) +; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[50:51] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[50:51] ; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(11) -; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[50:51] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51] ; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(9) -; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[48:49] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49] -; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(14) +; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[38:39] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[38:39] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v60, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc +; GFX950-NEXT: s_waitcnt vmcnt(12) +; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[36:37] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[36:37] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(10) +; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[34:35] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[34:35] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(8) +; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[32:33] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[32:33] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(6) -; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[34:35] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35] -; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse +; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41] ; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc ; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(4) -; GFX950-NEXT: v_max_f64 v[0:1], v[26:27], v[32:33] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc +; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[6:7] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[6:7] +; GFX950-NEXT: v_cndmask_b32_e64 v7, v59, v60, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e64 v6, v58, 0, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(2) -; GFX950-NEXT: v_max_f64 v[0:1], v[28:29], v[36:37] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc +; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[4:5] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[4:5] +; GFX950-NEXT: v_cndmask_b32_e64 v5, v57, v60, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e64 v4, v56, 0, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[38:39] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc -; GFX950-NEXT: v_mov_b32_e32 v0, v58 -; GFX950-NEXT: v_mov_b32_e32 v1, v59 -; GFX950-NEXT: v_mov_b32_e32 v2, v60 +; GFX950-NEXT: v_max_f64 v[0:1], v[8:9], v[2:3] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[2:3] +; GFX950-NEXT: v_cndmask_b32_e64 v3, v45, v60, s[4:5] +; GFX950-NEXT: v_cndmask_b32_e64 v2, v44, 0, s[4:5] +; GFX950-NEXT: v_cndmask_b32_e32 v9, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v1, v43, v60, s[6:7] +; GFX950-NEXT: v_cndmask_b32_e64 v0, v42, 0, s[6:7] ; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index dfd67873c3b86..b119dd425463b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -561,17 +561,17 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) { ; GFX950-LABEL: v_minimum_v2f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc +; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[6:7] +; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000 ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_min_f64 v[6:7], v[0:1], v[4:5] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f64: @@ -630,12 +630,19 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v2f64__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v2f64__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v2f64__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f64__nnan: ; GFX10: ; %bb.0: @@ -711,17 +718,17 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1 ; GFX950-LABEL: v_minimum_v2f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] -; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7] -; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc +; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[6:7] +; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000 ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7] +; GFX950-NEXT: v_min_f64 v[6:7], v[0:1], v[4:5] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f64__nsz: @@ -780,12 +787,19 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double> ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v2f64__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v2f64__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v2f64__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v2f64__nnan_nsz: ; GFX10: ; %bb.0: @@ -1008,22 +1022,22 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) { ; GFX950-LABEL: v_minimum_v3f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] +; GFX950-NEXT: v_min_f64 v[12:13], v[4:5], v[10:11] +; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_min_f64 v[10:11], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[6:7] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc -; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f64: @@ -1092,13 +1106,21 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src ; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v3f64__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v3f64__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v3f64__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f64__nnan: ; GFX10: ; %bb.0: @@ -1189,22 +1211,22 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1 ; GFX950-LABEL: v_minimum_v3f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] -; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9] +; GFX950-NEXT: v_min_f64 v[12:13], v[4:5], v[10:11] +; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] +; GFX950-NEXT: v_min_f64 v[10:11], v[2:3], v[8:9] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9] +; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[6:7] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc -; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f64__nsz: @@ -1273,13 +1295,21 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double> ; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v3f64__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v3f64__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v3f64__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v3f64__nnan_nsz: ; GFX10: ; %bb.0: @@ -1382,27 +1412,27 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) { ; GFX950-LABEL: v_minimum_v4f64: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] +; GFX950-NEXT: v_min_f64 v[16:17], v[6:7], v[14:15] +; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_min_f64 v[14:15], v[4:5], v[12:13] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc -; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13] +; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_min_f64 v[12:13], v[2:3], v[10:11] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_min_f64 v[10:11], v[0:1], v[8:9] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc -; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f64: @@ -1482,14 +1512,23 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src ; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v4f64__nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v4f64__nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v4f64__nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f64__nnan: ; GFX10: ; %bb.0: @@ -1595,27 +1634,27 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1 ; GFX950-LABEL: v_minimum_v4f64__nsz: ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] -; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11] +; GFX950-NEXT: v_min_f64 v[16:17], v[6:7], v[14:15] +; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000 +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] +; GFX950-NEXT: v_min_f64 v[14:15], v[4:5], v[12:13] ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc -; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000 -; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc -; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13] +; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13] +; GFX950-NEXT: v_min_f64 v[12:13], v[2:3], v[10:11] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11] +; GFX950-NEXT: v_min_f64 v[10:11], v[0:1], v[8:9] +; GFX950-NEXT: s_nop 0 +; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc -; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f64__nsz: @@ -1695,14 +1734,23 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double> ; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimum_v4f64__nnan_nsz: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimum_v4f64__nnan_nsz: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimum_v4f64__nnan_nsz: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v4f64__nnan_nsz: ; GFX10: ; %bb.0: @@ -1864,43 +1912,43 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) { ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: scratch_load_dword v31, off, s32 ; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000 -; GFX950-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] -; GFX950-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19] -; GFX950-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21] -; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19] +; GFX950-NEXT: v_min_f64 v[32:33], v[12:13], v[28:29] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29] +; GFX950-NEXT: v_min_f64 v[34:35], v[10:11], v[26:27] +; GFX950-NEXT: v_min_f64 v[36:37], v[8:9], v[24:25] +; GFX950-NEXT: v_cndmask_b32_e32 v13, v33, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v12, v32, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27] ; GFX950-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23] -; GFX950-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25] -; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21] -; GFX950-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27] -; GFX950-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29] -; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23] -; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31] -; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc +; GFX950-NEXT: v_min_f64 v[48:49], v[4:5], v[20:21] +; GFX950-NEXT: v_cndmask_b32_e32 v11, v35, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v10, v34, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25] +; GFX950-NEXT: v_min_f64 v[50:51], v[2:3], v[18:19] +; GFX950-NEXT: v_min_f64 v[52:53], v[0:1], v[16:17] +; GFX950-NEXT: v_cndmask_b32_e32 v9, v37, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v8, v36, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27] +; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29] +; GFX950-NEXT: v_cndmask_b32_e32 v5, v49, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v4, v48, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e32 v3, v51, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v2, v50, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17] +; GFX950-NEXT: s_waitcnt vmcnt(0) +; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31] +; GFX950-NEXT: v_cndmask_b32_e32 v1, v53, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v0, v52, 0, vcc ; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v8f64: @@ -2371,152 +2419,144 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse -; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8 -; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4 -; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16 -; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12 -; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24 -; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20 +; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:120 +; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:116 ; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32 ; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28 -; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40 -; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36 -; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48 -; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44 -; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56 -; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52 -; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64 -; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60 -; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72 -; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68 -; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80 -; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76 -; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88 -; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84 -; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96 -; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92 +; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:24 +; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:20 +; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:16 +; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:12 +; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:8 +; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:4 +; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:112 ; GFX950-NEXT: scratch_load_dword v31, off, s32 -; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104 -; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100 +; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:128 +; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:124 +; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:108 +; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:104 +; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:100 +; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:96 +; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:92 +; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:88 +; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:84 +; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:80 +; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:76 +; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:72 +; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:68 +; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_min_f64 v[58:59], v[0:1], v[32:33] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33] -; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112 -; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108 -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_min_f64 v[60:61], v[2:3], v[36:37] -; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37] -; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120 -; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116 -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_min_f64 v[62:63], v[4:5], v[38:39] -; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39] -; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128 -; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124 -; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000 -; GFX950-NEXT: s_waitcnt vmcnt(25) -; GFX950-NEXT: v_min_f64 v[0:1], v[6:7], v[56:57] -; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57] +; GFX950-NEXT: v_mov_b32_e32 v60, 0x7ff80000 ; GFX950-NEXT: s_waitcnt vmcnt(23) -; GFX950-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47] -; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47] -; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5] -; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5] -; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(21) -; GFX950-NEXT: v_min_f64 v[0:1], v[10:11], v[44:45] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45] -; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1] -; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1] -; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc +; GFX950-NEXT: v_min_f64 v[46:47], v[28:29], v[40:41] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[40:41] +; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64 +; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_min_f64 v[58:59], v[6:7], v[56:57] +; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[6:7], v[56:57] +; GFX950-NEXT: scratch_load_dword v7, off, s32 offset:56 +; GFX950-NEXT: scratch_load_dword v6, off, s32 offset:52 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_min_f64 v[56:57], v[4:5], v[44:45] +; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[44:45] +; GFX950-NEXT: scratch_load_dword v5, off, s32 offset:48 +; GFX950-NEXT: scratch_load_dword v4, off, s32 offset:44 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_min_f64 v[44:45], v[2:3], v[42:43] +; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[42:43] +; GFX950-NEXT: scratch_load_dword v3, off, s32 offset:40 +; GFX950-NEXT: scratch_load_dword v2, off, s32 offset:36 +; GFX950-NEXT: s_waitcnt vmcnt(23) +; GFX950-NEXT: v_min_f64 v[42:43], v[0:1], v[52:53] +; GFX950-NEXT: v_cmp_u_f64_e64 s[6:7], v[0:1], v[52:53] ; GFX950-NEXT: s_waitcnt vmcnt(19) -; GFX950-NEXT: v_min_f64 v[0:1], v[12:13], v[42:43] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43] -; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3] -; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3] -; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(17) -; GFX950-NEXT: v_min_f64 v[0:1], v[14:15], v[40:41] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41] -; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(15) -; GFX950-NEXT: v_min_f64 v[0:1], v[16:17], v[54:55] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55] -; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(13) -; GFX950-NEXT: v_min_f64 v[0:1], v[18:19], v[52:53] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53] -; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX950-NEXT: v_min_f64 v[0:1], v[30:31], v[54:55] +; GFX950-NEXT: s_waitcnt vmcnt(18) +; GFX950-NEXT: v_min_f64 v[52:53], v[26:27], v[48:49] +; GFX950-NEXT: v_cmp_u_f64_e64 s[8:9], v[30:31], v[54:55] +; GFX950-NEXT: v_cndmask_b32_e32 v29, v47, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v28, v46, 0, vcc +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[48:49] +; GFX950-NEXT: v_cndmask_b32_e64 v31, v1, v60, s[8:9] +; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, s[8:9] +; GFX950-NEXT: v_cndmask_b32_e32 v27, v53, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v26, v52, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(16) +; GFX950-NEXT: v_min_f64 v[0:1], v[24:25], v[50:51] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[50:51] ; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(11) -; GFX950-NEXT: v_min_f64 v[0:1], v[20:21], v[50:51] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51] ; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc -; GFX950-NEXT: s_waitcnt vmcnt(9) -; GFX950-NEXT: v_min_f64 v[0:1], v[22:23], v[48:49] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49] -; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse -; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(14) +; GFX950-NEXT: v_min_f64 v[0:1], v[22:23], v[38:39] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[38:39] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v60, vcc ; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc +; GFX950-NEXT: s_waitcnt vmcnt(12) +; GFX950-NEXT: v_min_f64 v[0:1], v[20:21], v[36:37] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[36:37] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(10) +; GFX950-NEXT: v_min_f64 v[0:1], v[18:19], v[34:35] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[34:35] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc +; GFX950-NEXT: s_waitcnt vmcnt(8) +; GFX950-NEXT: v_min_f64 v[0:1], v[16:17], v[32:33] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[32:33] +; GFX950-NEXT: s_nop 1 +; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(6) -; GFX950-NEXT: v_min_f64 v[0:1], v[24:25], v[34:35] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35] -; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse +; GFX950-NEXT: v_min_f64 v[0:1], v[14:15], v[40:41] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41] ; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse -; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc ; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(4) -; GFX950-NEXT: v_min_f64 v[0:1], v[26:27], v[32:33] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc +; GFX950-NEXT: v_min_f64 v[0:1], v[12:13], v[6:7] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[6:7] +; GFX950-NEXT: v_cndmask_b32_e64 v7, v59, v60, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e64 v6, v58, 0, s[0:1] +; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(2) -; GFX950-NEXT: v_min_f64 v[0:1], v[28:29], v[36:37] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc +; GFX950-NEXT: v_min_f64 v[0:1], v[10:11], v[4:5] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[4:5] +; GFX950-NEXT: v_cndmask_b32_e64 v5, v57, v60, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e64 v4, v56, 0, s[2:3] +; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc ; GFX950-NEXT: s_waitcnt vmcnt(0) -; GFX950-NEXT: v_min_f64 v[0:1], v[30:31], v[38:39] -; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39] -; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc -; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc -; GFX950-NEXT: v_mov_b32_e32 v0, v58 -; GFX950-NEXT: v_mov_b32_e32 v1, v59 -; GFX950-NEXT: v_mov_b32_e32 v2, v60 +; GFX950-NEXT: v_min_f64 v[0:1], v[8:9], v[2:3] +; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[2:3] +; GFX950-NEXT: v_cndmask_b32_e64 v3, v45, v60, s[4:5] +; GFX950-NEXT: v_cndmask_b32_e64 v2, v44, 0, s[4:5] +; GFX950-NEXT: v_cndmask_b32_e32 v9, v1, v60, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc +; GFX950-NEXT: v_cndmask_b32_e64 v1, v43, v60, s[6:7] +; GFX950-NEXT: v_cndmask_b32_e64 v0, v42, 0, s[6:7] ; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse ; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse ; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll index 5b2213592f495..d947f543a1a0a 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll @@ -5312,18 +5312,15 @@ define amdgpu_kernel void @constant_zextload_v2i1_to_v2i64(ptr addrspace(1) %out ; GFX1250-LABEL: constant_zextload_v2i1_to_v2i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: v_mov_b32_e32 v3, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: global_load_u8 v0, v1, s[2:3] +; GFX1250-NEXT: global_load_u8 v0, v3, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v0 -; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 1, v2 -; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] +; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_dual_lshrrev_b32 v2, 1, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40 +; GFX1250-NEXT: v_mov_b32_e32 v1, v3 +; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <2 x i1>, ptr addrspace(4) %in %ext = zext <2 x i1> %load to <2 x i64> @@ -5531,22 +5528,19 @@ define amdgpu_kernel void @constant_zextload_v3i1_to_v3i64(ptr addrspace(1) %out ; GFX1250-LABEL: constant_zextload_v3i1_to_v3i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v5, 0 +; GFX1250-NEXT: v_mov_b32_e32 v3, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: global_load_u8 v0, v5, s[2:3] +; GFX1250-NEXT: global_load_u8 v2, v3, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX1250-NEXT: v_bfe_u32 v2, v0, 1, 1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_dual_lshrrev_b32 v4, 2, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40 -; GFX1250-NEXT: v_dual_mov_b32 v1, v5 :: v_dual_mov_b32 v3, v5 -; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_lshrrev_b32 v4, 2, v0 +; GFX1250-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_bitop2_b32 v0, 1, v2 bitop3:0x40 +; GFX1250-NEXT: v_bfe_u32 v2, v2, 1, 1 ; GFX1250-NEXT: v_and_b32_e32 v4, 0xffff, v4 ; GFX1250-NEXT: s_clause 0x1 -; GFX1250-NEXT: global_store_b64 v5, v[4:5], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v5, v[0:3], s[0:1] +; GFX1250-NEXT: global_store_b64 v3, v[4:5], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <3 x i1>, ptr addrspace(4) %in %ext = zext <3 x i1> %load to <3 x i64> @@ -5800,27 +5794,20 @@ define amdgpu_kernel void @constant_zextload_v4i1_to_v4i64(ptr addrspace(1) %out ; GFX1250-LABEL: constant_zextload_v4i1_to_v4i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: v_mov_b32_e32 v3, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: global_load_u8 v0, v1, s[2:3] +; GFX1250-NEXT: global_load_u8 v6, v3, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: v_readfirstlane_b32 s2, v0 -; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10002 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 3, v0 -; GFX1250-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, s3 -; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10001 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GFX1250-NEXT: s_and_b32 s2, s2, 1 -; GFX1250-NEXT: s_and_b32 s3, 0xffff, s3 -; GFX1250-NEXT: s_and_b32 s2, 0xffff, s2 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] +; GFX1250-NEXT: v_and_b32_e32 v2, 0xffff, v6 +; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_bitop2_b32 v4, 1, v6 bitop3:0x40 +; GFX1250-NEXT: v_dual_mov_b32 v7, v3 :: v_dual_mov_b32 v5, v3 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 3, v2 +; GFX1250-NEXT: v_bfe_u32 v0, v6, 2, 1 +; GFX1250-NEXT: v_bfe_u32 v6, v6, 1, 1 +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v3, v[4:7], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <4 x i1>, ptr addrspace(4) %in %ext = zext <4 x i1> %load to <4 x i64> @@ -6136,28 +6123,28 @@ define amdgpu_kernel void @constant_zextload_v8i1_to_v8i64(ptr addrspace(1) %out ; GFX1250-LABEL: constant_zextload_v8i1_to_v8i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: v_mov_b32_e32 v3, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: global_load_u8 v12, v1, s[2:3] +; GFX1250-NEXT: global_load_u8 v12, v3, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v12 ; GFX1250-NEXT: v_bfe_u32 v6, v12, 5, 1 ; GFX1250-NEXT: v_bfe_u32 v4, v12, 4, 1 ; GFX1250-NEXT: v_bfe_u32 v10, v12, 3, 1 ; GFX1250-NEXT: v_bfe_u32 v8, v12, 2, 1 -; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_lshrrev_b32 v2, 7, v0 -; GFX1250-NEXT: v_mov_b32_e32 v5, v1 +; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_lshrrev_b32 v2, 7, v0 +; GFX1250-NEXT: v_mov_b32_e32 v7, v3 ; GFX1250-NEXT: v_bfe_u32 v0, v0, 6, 1 -; GFX1250-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_mov_b32 v9, v1 -; GFX1250-NEXT: v_dual_mov_b32 v11, v1 :: v_dual_mov_b32 v13, v1 -; GFX1250-NEXT: v_mov_b32_e32 v15, v1 +; GFX1250-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v11, v3 +; GFX1250-NEXT: v_dual_mov_b32 v9, v3 :: v_dual_mov_b32 v15, v3 +; GFX1250-NEXT: v_mov_b32_e32 v13, v3 ; GFX1250-NEXT: v_bfe_u32 v14, v12, 1, 1 ; GFX1250-NEXT: v_and_b32_e32 v12, 1, v12 ; GFX1250-NEXT: s_clause 0x3 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:48 -; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:32 -; GFX1250-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v1, v[12:15], s[0:1] +; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:48 +; GFX1250-NEXT: global_store_b128 v3, v[4:7], s[0:1] offset:32 +; GFX1250-NEXT: global_store_b128 v3, v[8:11], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v3, v[12:15], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <8 x i1>, ptr addrspace(4) %in %ext = zext <8 x i1> %load to <8 x i64> @@ -6374,35 +6361,35 @@ define amdgpu_kernel void @constant_sextload_v8i1_to_v8i64(ptr addrspace(1) %out ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: v_readfirstlane_b32 s3, v0 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-NEXT: v_mov_b32_e32 v10, s3 -; GFX1250-NEXT: s_lshr_b32 s2, s3, 6 -; GFX1250-NEXT: s_lshr_b32 s4, s3, 7 -; GFX1250-NEXT: s_lshr_b32 s6, s3, 4 -; GFX1250-NEXT: s_lshr_b32 s8, s3, 5 -; GFX1250-NEXT: s_lshr_b32 s10, s3, 2 -; GFX1250-NEXT: s_lshr_b32 s12, s3, 3 -; GFX1250-NEXT: s_lshr_b32 s14, s3, 1 -; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 -; GFX1250-NEXT: v_bfe_i32 v12, v10, 0, 1 -; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 +; GFX1250-NEXT: v_mov_b32_e32 v4, s3 +; GFX1250-NEXT: s_lshr_b32 s12, s3, 6 +; GFX1250-NEXT: s_lshr_b32 s14, s3, 7 +; GFX1250-NEXT: s_lshr_b32 s8, s3, 4 +; GFX1250-NEXT: s_lshr_b32 s10, s3, 5 +; GFX1250-NEXT: s_lshr_b32 s4, s3, 2 +; GFX1250-NEXT: s_lshr_b32 s6, s3, 3 +; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s2, s3, 1 ; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 -; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7 -; GFX1250-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9 -; GFX1250-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11 -; GFX1250-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13 -; GFX1250-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_ashrrev_i32 v13, 31, v12 -; GFX1250-NEXT: v_mov_b32_e32 v15, s15 +; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 +; GFX1250-NEXT: v_bfe_i32 v4, v4, 0, 1 +; GFX1250-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GFX1250-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 +; GFX1250-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GFX1250-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[6:7] +; GFX1250-NEXT: v_mov_b64_e32 v[6:7], s[2:3] +; GFX1250-NEXT: v_ashrrev_i32_e32 v5, 31, v4 ; GFX1250-NEXT: s_clause 0x3 -; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 -; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 -; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1] +; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1] offset:48 +; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:32 +; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <8 x i1>, ptr addrspace(4) %in %ext = sext <8 x i1> %load to <8 x i64> @@ -6696,44 +6683,43 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o ; GFX1250-LABEL: constant_zextload_v16i1_to_v16i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 -; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: v_mov_b32_e32 v3, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: global_load_u16 v12, v1, s[2:3] +; GFX1250-NEXT: global_load_u16 v10, v3, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: v_and_b32_e32 v22, 0xffff, v12 -; GFX1250-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_bitop2_b32 v28, 1, v12 bitop3:0x40 -; GFX1250-NEXT: v_mov_b32_e32 v5, v1 -; GFX1250-NEXT: v_bfe_u32 v0, v12, 10, 1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) -; GFX1250-NEXT: v_bfe_u32 v2, v22, 11, 1 -; GFX1250-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_mov_b32 v9, v1 -; GFX1250-NEXT: v_bfe_u32 v6, v12, 9, 1 -; GFX1250-NEXT: v_bfe_u32 v4, v22, 8, 1 -; GFX1250-NEXT: v_dual_mov_b32 v11, v1 :: v_dual_mov_b32 v13, v1 -; GFX1250-NEXT: v_dual_mov_b32 v31, v1 :: v_dual_lshrrev_b32 v10, 15, v22 -; GFX1250-NEXT: v_bfe_u32 v8, v22, 14, 1 -; GFX1250-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v17, v1 -; GFX1250-NEXT: v_bfe_u32 v14, v12, 13, 1 -; GFX1250-NEXT: v_bfe_u32 v18, v12, 7, 1 -; GFX1250-NEXT: v_bfe_u32 v26, v12, 3, 1 -; GFX1250-NEXT: v_bfe_u32 v30, v12, 1, 1 -; GFX1250-NEXT: v_bfe_u32 v24, v12, 2, 1 -; GFX1250-NEXT: v_bfe_u32 v20, v12, 4, 1 -; GFX1250-NEXT: v_bfe_u32 v16, v12, 6, 1 -; GFX1250-NEXT: v_bfe_u32 v12, v12, 12, 1 -; GFX1250-NEXT: v_dual_mov_b32 v19, v1 :: v_dual_mov_b32 v21, v1 -; GFX1250-NEXT: v_dual_mov_b32 v23, v1 :: v_dual_mov_b32 v25, v1 -; GFX1250-NEXT: v_dual_mov_b32 v27, v1 :: v_dual_mov_b32 v29, v1 +; GFX1250-NEXT: v_and_b32_e32 v22, 0xffff, v10 +; GFX1250-NEXT: v_bfe_u32 v6, v10, 13, 1 +; GFX1250-NEXT: v_bfe_u32 v4, v10, 12, 1 +; GFX1250-NEXT: v_bfe_u32 v30, v10, 1, 1 +; GFX1250-NEXT: v_bfe_u32 v26, v10, 3, 1 +; GFX1250-NEXT: v_dual_mov_b32 v1, v3 :: v_dual_lshrrev_b32 v2, 15, v22 +; GFX1250-NEXT: v_mov_b32_e32 v7, v3 +; GFX1250-NEXT: v_bfe_u32 v0, v22, 14, 1 +; GFX1250-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v11, v3 +; GFX1250-NEXT: v_dual_mov_b32 v9, v3 :: v_dual_mov_b32 v15, v3 +; GFX1250-NEXT: v_dual_mov_b32 v29, v3 :: v_dual_bitop2_b32 v28, 1, v10 bitop3:0x40 +; GFX1250-NEXT: v_bfe_u32 v18, v10, 7, 1 +; GFX1250-NEXT: v_bfe_u32 v14, v10, 9, 1 +; GFX1250-NEXT: v_bfe_u32 v8, v10, 10, 1 +; GFX1250-NEXT: v_bfe_u32 v16, v10, 6, 1 +; GFX1250-NEXT: v_bfe_u32 v20, v10, 4, 1 +; GFX1250-NEXT: v_bfe_u32 v24, v10, 2, 1 +; GFX1250-NEXT: v_bfe_u32 v10, v22, 11, 1 +; GFX1250-NEXT: v_dual_mov_b32 v13, v3 :: v_dual_mov_b32 v19, v3 +; GFX1250-NEXT: v_bfe_u32 v12, v22, 8, 1 +; GFX1250-NEXT: v_dual_mov_b32 v17, v3 :: v_dual_mov_b32 v23, v3 +; GFX1250-NEXT: v_dual_mov_b32 v21, v3 :: v_dual_mov_b32 v27, v3 +; GFX1250-NEXT: v_dual_mov_b32 v25, v3 :: v_dual_mov_b32 v31, v3 ; GFX1250-NEXT: v_bfe_u32 v22, v22, 5, 1 ; GFX1250-NEXT: s_clause 0x7 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:80 -; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:64 -; GFX1250-NEXT: global_store_b128 v1, v[8:11], s[0:1] offset:112 -; GFX1250-NEXT: global_store_b128 v1, v[12:15], s[0:1] offset:96 -; GFX1250-NEXT: global_store_b128 v1, v[16:19], s[0:1] offset:48 -; GFX1250-NEXT: global_store_b128 v1, v[20:23], s[0:1] offset:32 -; GFX1250-NEXT: global_store_b128 v1, v[24:27], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v1, v[28:31], s[0:1] +; GFX1250-NEXT: global_store_b128 v3, v[0:3], s[0:1] offset:112 +; GFX1250-NEXT: global_store_b128 v3, v[4:7], s[0:1] offset:96 +; GFX1250-NEXT: global_store_b128 v3, v[8:11], s[0:1] offset:80 +; GFX1250-NEXT: global_store_b128 v3, v[12:15], s[0:1] offset:64 +; GFX1250-NEXT: global_store_b128 v3, v[16:19], s[0:1] offset:48 +; GFX1250-NEXT: global_store_b128 v3, v[20:23], s[0:1] offset:32 +; GFX1250-NEXT: global_store_b128 v3, v[24:27], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v3, v[28:31], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <16 x i1>, ptr addrspace(4) %in %ext = zext <16 x i1> %load to <16 x i64> @@ -7106,65 +7092,64 @@ define amdgpu_kernel void @constant_sextload_v16i1_to_v16i64(ptr addrspace(1) %o ; GFX1250-NEXT: global_load_u16 v0, v32, s[2:3] ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: v_readfirstlane_b32 s3, v0 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1250-NEXT: v_mov_b32_e32 v28, s3 -; GFX1250-NEXT: s_lshr_b32 s2, s3, 14 -; GFX1250-NEXT: s_lshr_b32 s4, s3, 15 -; GFX1250-NEXT: s_lshr_b32 s10, s3, 10 -; GFX1250-NEXT: s_lshr_b32 s12, s3, 11 -; GFX1250-NEXT: s_lshr_b32 s6, s3, 12 -; GFX1250-NEXT: s_lshr_b32 s8, s3, 13 -; GFX1250-NEXT: s_lshr_b32 s14, s3, 8 -; GFX1250-NEXT: s_lshr_b32 s16, s3, 9 -; GFX1250-NEXT: s_lshr_b32 s18, s3, 6 -; GFX1250-NEXT: s_lshr_b32 s20, s3, 7 -; GFX1250-NEXT: s_lshr_b32 s22, s3, 4 -; GFX1250-NEXT: s_lshr_b32 s24, s3, 5 -; GFX1250-NEXT: s_lshr_b32 s26, s3, 2 -; GFX1250-NEXT: s_lshr_b32 s28, s3, 3 -; GFX1250-NEXT: s_lshr_b32 s30, s3, 1 -; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s20, s3, 10 +; GFX1250-NEXT: s_lshr_b32 s22, s3, 11 +; GFX1250-NEXT: s_lshr_b32 s28, s3, 14 +; GFX1250-NEXT: s_lshr_b32 s30, s3, 15 +; GFX1250-NEXT: s_lshr_b32 s16, s3, 8 +; GFX1250-NEXT: s_lshr_b32 s18, s3, 9 +; GFX1250-NEXT: s_lshr_b32 s24, s3, 12 +; GFX1250-NEXT: s_lshr_b32 s26, s3, 13 +; GFX1250-NEXT: v_mov_b32_e32 v0, s3 +; GFX1250-NEXT: s_lshr_b32 s12, s3, 6 +; GFX1250-NEXT: s_lshr_b32 s14, s3, 7 +; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s8, s3, 4 +; GFX1250-NEXT: s_lshr_b32 s10, s3, 5 +; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s4, s3, 2 +; GFX1250-NEXT: s_lshr_b32 s6, s3, 3 ; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 +; GFX1250-NEXT: v_mov_b64_e32 v[20:21], s[20:21] +; GFX1250-NEXT: v_mov_b64_e32 v[28:29], s[28:29] +; GFX1250-NEXT: v_mov_b64_e32 v[30:31], s[30:31] +; GFX1250-NEXT: v_mov_b64_e32 v[22:23], s[22:23] +; GFX1250-NEXT: s_lshr_b32 s2, s3, 1 +; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 +; GFX1250-NEXT: v_mov_b64_e32 v[16:17], s[16:17] +; GFX1250-NEXT: v_mov_b64_e32 v[24:25], s[24:25] +; GFX1250-NEXT: v_mov_b64_e32 v[26:27], s[26:27] +; GFX1250-NEXT: v_mov_b64_e32 v[18:19], s[18:19] ; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 -; GFX1250-NEXT: v_dual_mov_b32 v8, s10 :: v_dual_mov_b32 v9, s11 -; GFX1250-NEXT: v_dual_mov_b32 v10, s12 :: v_dual_mov_b32 v11, s13 -; GFX1250-NEXT: v_bfe_i32 v28, v28, 0, 1 -; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7 -; GFX1250-NEXT: v_dual_mov_b32 v6, s8 :: v_dual_mov_b32 v7, s9 -; GFX1250-NEXT: v_dual_mov_b32 v12, s14 :: v_dual_mov_b32 v13, s15 -; GFX1250-NEXT: v_dual_mov_b32 v14, s16 :: v_dual_mov_b32 v15, s17 -; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v17, s19 -; GFX1250-NEXT: v_dual_mov_b32 v18, s20 :: v_dual_mov_b32 v19, s21 -; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v20, s22 :: v_dual_mov_b32 v21, s23 -; GFX1250-NEXT: v_dual_mov_b32 v22, s24 :: v_dual_mov_b32 v23, s25 -; GFX1250-NEXT: v_dual_mov_b32 v24, s26 :: v_dual_mov_b32 v25, s27 -; GFX1250-NEXT: v_dual_mov_b32 v26, s28 :: v_dual_mov_b32 v27, s29 -; GFX1250-NEXT: v_dual_mov_b32 v30, s30 :: v_dual_mov_b32 v31, s31 +; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 +; GFX1250-NEXT: v_mov_b64_e32 v[12:13], s[12:13] +; GFX1250-NEXT: v_mov_b64_e32 v[14:15], s[14:15] +; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 1 +; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 +; GFX1250-NEXT: v_mov_b64_e32 v[8:9], s[8:9] +; GFX1250-NEXT: v_mov_b64_e32 v[10:11], s[10:11] +; GFX1250-NEXT: v_mov_b64_e32 v[4:5], s[4:5] +; GFX1250-NEXT: v_mov_b64_e32 v[6:7], s[6:7] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX1250-NEXT: s_clause 0x1 -; GFX1250-NEXT: global_store_b128 v32, v[0:3], s[0:1] offset:112 -; GFX1250-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:96 -; GFX1250-NEXT: v_ashrrev_i32_e32 v29, 31, v28 +; GFX1250-NEXT: global_store_b128 v32, v[28:31], s[0:1] offset:112 +; GFX1250-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:96 +; GFX1250-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:80 -; GFX1250-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:64 -; GFX1250-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:48 -; GFX1250-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:32 -; GFX1250-NEXT: global_store_b128 v32, v[24:27], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v32, v[28:31], s[0:1] +; GFX1250-NEXT: global_store_b128 v32, v[20:23], s[0:1] offset:80 +; GFX1250-NEXT: global_store_b128 v32, v[16:19], s[0:1] offset:64 +; GFX1250-NEXT: global_store_b128 v32, v[12:15], s[0:1] offset:48 +; GFX1250-NEXT: global_store_b128 v32, v[8:11], s[0:1] offset:32 +; GFX1250-NEXT: global_store_b128 v32, v[4:7], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v32, v[0:3], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <16 x i1>, ptr addrspace(4) %in %ext = sext <16 x i1> %load to <16 x i64> @@ -7727,11 +7712,11 @@ define amdgpu_kernel void @constant_zextload_v32i1_to_v32i64(ptr addrspace(1) %o ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s3 -; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10014 -; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10015 +; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10015 +; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10014 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v2, s4 +; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s3 ; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10013 ; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10012 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160 @@ -8499,87 +8484,87 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_lshr_b32 s34, s2, 30 -; GFX1250-NEXT: s_lshr_b32 s36, s2, 31 -; GFX1250-NEXT: s_lshr_b32 s38, s2, 28 -; GFX1250-NEXT: s_lshr_b32 s40, s2, 29 -; GFX1250-NEXT: s_lshr_b32 s42, s2, 26 -; GFX1250-NEXT: s_lshr_b32 s44, s2, 27 -; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s46, s2, 24 -; GFX1250-NEXT: s_lshr_b32 s48, s2, 25 -; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s34 -; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v1, s35 :: v_dual_mov_b32 v2, s36 -; GFX1250-NEXT: v_dual_mov_b32 v3, s37 :: v_dual_mov_b32 v4, s38 -; GFX1250-NEXT: s_lshr_b32 s26, s2, 22 +; GFX1250-NEXT: s_lshr_b32 s64, s2, 30 +; GFX1250-NEXT: s_lshr_b32 s66, s2, 31 +; GFX1250-NEXT: s_lshr_b32 s60, s2, 28 +; GFX1250-NEXT: s_lshr_b32 s62, s2, 29 +; GFX1250-NEXT: s_lshr_b32 s56, s2, 26 +; GFX1250-NEXT: s_lshr_b32 s58, s2, 27 +; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s52, s2, 24 +; GFX1250-NEXT: s_lshr_b32 s54, s2, 25 +; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s64 +; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v1, s65 :: v_dual_mov_b32 v2, s66 +; GFX1250-NEXT: v_dual_mov_b32 v3, s67 :: v_dual_mov_b32 v4, s60 +; GFX1250-NEXT: s_lshr_b32 s48, s2, 22 ; GFX1250-NEXT: s_lshr_b32 s50, s2, 23 -; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v5, s39 :: v_dual_mov_b32 v6, s40 -; GFX1250-NEXT: v_dual_mov_b32 v7, s41 :: v_dual_mov_b32 v8, s42 -; GFX1250-NEXT: s_lshr_b32 s52, s2, 20 -; GFX1250-NEXT: s_lshr_b32 s54, s2, 21 -; GFX1250-NEXT: v_dual_mov_b32 v9, s43 :: v_dual_mov_b32 v10, s44 -; GFX1250-NEXT: v_dual_mov_b32 v11, s45 :: v_dual_mov_b32 v12, s46 -; GFX1250-NEXT: s_lshr_b32 s56, s2, 18 -; GFX1250-NEXT: s_lshr_b32 s58, s2, 19 -; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v13, s47 :: v_dual_mov_b32 v14, s48 -; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 -; GFX1250-NEXT: v_mov_b32_e32 v15, s49 -; GFX1250-NEXT: s_lshr_b32 s60, s2, 16 -; GFX1250-NEXT: s_lshr_b32 s62, s2, 17 ; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s64, s2, 14 -; GFX1250-NEXT: s_lshr_b32 s66, s2, 15 -; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v5, s61 :: v_dual_mov_b32 v6, s62 +; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v8, s56 +; GFX1250-NEXT: s_lshr_b32 s44, s2, 20 +; GFX1250-NEXT: s_lshr_b32 s46, s2, 21 +; GFX1250-NEXT: v_dual_mov_b32 v9, s57 :: v_dual_mov_b32 v10, s58 +; GFX1250-NEXT: v_dual_mov_b32 v11, s59 :: v_dual_mov_b32 v12, s52 +; GFX1250-NEXT: s_lshr_b32 s40, s2, 18 +; GFX1250-NEXT: s_lshr_b32 s42, s2, 19 +; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v13, s53 :: v_dual_mov_b32 v14, s54 +; GFX1250-NEXT: v_mov_b32_e32 v15, s55 +; GFX1250-NEXT: s_lshr_b32 s24, s2, 16 +; GFX1250-NEXT: s_lshr_b32 s38, s2, 17 +; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s36, s2, 14 +; GFX1250-NEXT: s_lshr_b32 s34, s2, 15 +; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 ; GFX1250-NEXT: s_clause 0x3 ; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:240 ; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:224 ; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[0:1] offset:208 ; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[0:1] offset:192 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v1, s27 +; GFX1250-NEXT: v_dual_mov_b32 v0, s48 :: v_dual_mov_b32 v1, s49 ; GFX1250-NEXT: v_dual_mov_b32 v2, s50 :: v_dual_mov_b32 v3, s51 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_mov_b32_e32 v4, s52 +; GFX1250-NEXT: v_mov_b32_e32 v4, s44 +; GFX1250-NEXT: s_lshr_b32 s26, s2, 10 +; GFX1250-NEXT: s_lshr_b32 s22, s2, 11 ; GFX1250-NEXT: s_lshr_b32 s30, s2, 12 ; GFX1250-NEXT: s_lshr_b32 s28, s2, 13 -; GFX1250-NEXT: s_lshr_b32 s24, s2, 10 -; GFX1250-NEXT: s_lshr_b32 s22, s2, 11 -; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v5, s53 :: v_dual_mov_b32 v6, s54 +; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v5, s45 :: v_dual_mov_b32 v6, s46 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v7, s55 :: v_dual_mov_b32 v8, s56 +; GFX1250-NEXT: v_dual_mov_b32 v7, s47 :: v_dual_mov_b32 v8, s40 ; GFX1250-NEXT: s_lshr_b32 s20, s2, 8 ; GFX1250-NEXT: s_lshr_b32 s18, s2, 9 -; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v9, s57 :: v_dual_mov_b32 v10, s58 +; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v9, s41 :: v_dual_mov_b32 v10, s42 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v11, s59 :: v_dual_mov_b32 v12, s60 +; GFX1250-NEXT: v_dual_mov_b32 v11, s43 :: v_dual_mov_b32 v12, s24 ; GFX1250-NEXT: s_lshr_b32 s16, s2, 6 ; GFX1250-NEXT: s_lshr_b32 s14, s2, 7 -; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v13, s61 :: v_dual_mov_b32 v14, s62 -; GFX1250-NEXT: v_dual_mov_b32 v15, s63 :: v_dual_mov_b32 v16, s64 +; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v13, s25 :: v_dual_mov_b32 v14, s38 +; GFX1250-NEXT: v_dual_mov_b32 v15, s39 :: v_dual_mov_b32 v16, s36 ; GFX1250-NEXT: s_lshr_b32 s12, s2, 4 ; GFX1250-NEXT: s_lshr_b32 s10, s2, 5 ; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v17, s65 :: v_dual_mov_b32 v18, s66 -; GFX1250-NEXT: v_dual_mov_b32 v19, s67 :: v_dual_mov_b32 v20, s30 +; GFX1250-NEXT: v_dual_mov_b32 v17, s37 :: v_dual_mov_b32 v18, s34 +; GFX1250-NEXT: v_dual_mov_b32 v19, s35 :: v_dual_mov_b32 v20, s30 ; GFX1250-NEXT: s_lshr_b32 s8, s2, 2 ; GFX1250-NEXT: s_lshr_b32 s6, s2, 3 ; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 @@ -8594,11 +8579,11 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o ; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[0:1] offset:112 ; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[0:1] offset:96 ; GFX1250-NEXT: s_wait_xcnt 0x5 -; GFX1250-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 +; GFX1250-NEXT: v_dual_mov_b32 v0, s26 :: v_dual_mov_b32 v1, s27 ; GFX1250-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23 ; GFX1250-NEXT: s_wait_xcnt 0x4 ; GFX1250-NEXT: v_mov_b32_e32 v4, s20 -; GFX1250-NEXT: s_lshr_b32 s68, s2, 1 +; GFX1250-NEXT: s_lshr_b32 s4, s2, 1 ; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 ; GFX1250-NEXT: v_dual_mov_b32 v5, s21 :: v_dual_mov_b32 v6, s18 @@ -8609,16 +8594,16 @@ define amdgpu_kernel void @constant_sextload_v32i1_to_v32i64(ptr addrspace(1) %o ; GFX1250-NEXT: v_dual_mov_b32 v9, s17 :: v_dual_mov_b32 v10, s14 ; GFX1250-NEXT: s_wait_xcnt 0x2 ; GFX1250-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v12, s12 -; GFX1250-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[2:3], s[68:69], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 ; GFX1250-NEXT: v_dual_mov_b32 v13, s13 :: v_dual_mov_b32 v14, s10 ; GFX1250-NEXT: s_wait_xcnt 0x1 ; GFX1250-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v16, s8 ; GFX1250-NEXT: v_dual_mov_b32 v17, s9 :: v_dual_mov_b32 v18, s6 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v20, s4 -; GFX1250-NEXT: v_dual_mov_b32 v21, s5 :: v_dual_mov_b32 v22, s2 -; GFX1250-NEXT: v_mov_b32_e32 v23, s3 +; GFX1250-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v20, s2 +; GFX1250-NEXT: v_dual_mov_b32 v21, s3 :: v_dual_mov_b32 v22, s4 +; GFX1250-NEXT: v_mov_b32_e32 v23, s5 ; GFX1250-NEXT: s_clause 0x5 ; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[0:1] offset:80 ; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[0:1] offset:64 @@ -9661,20 +9646,16 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10014 +; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001e ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4 -; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10015 -; GFX1250-NEXT: s_lshr_b32 s4, s3, 31 -; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1 -; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001e -; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10004 -; GFX1250-NEXT: s_and_b32 s7, s2, 1 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 +; GFX1250-NEXT: s_lshr_b32 s5, s3, 31 ; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x1001d +; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, v1 ; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x1001c +; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10004 +; GFX1250-NEXT: s_bfe_u32 s7, s2, 0x10002 +; GFX1250-NEXT: s_and_b32 s8, s2, 1 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:496 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 @@ -9693,9 +9674,14 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:448 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 +; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10015 +; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10014 +; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 ; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10013 ; GFX1250-NEXT: s_bfe_u32 s5, s3, 0x10012 -; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:432 +; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:416 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 ; GFX1250-NEXT: s_bfe_u32 s4, s3, 0x10011 @@ -9763,11 +9749,11 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:192 ; GFX1250-NEXT: s_wait_xcnt 0x0 ; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 -; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10014 -; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10015 +; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10015 +; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10014 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:176 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v2, s5 +; GFX1250-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v2, s4 ; GFX1250-NEXT: s_bfe_u32 s4, s2, 0x10013 ; GFX1250-NEXT: s_bfe_u32 s5, s2, 0x10012 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:160 @@ -9813,16 +9799,15 @@ define amdgpu_kernel void @constant_zextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v2, s3 ; GFX1250-NEXT: s_bfe_u32 s3, s2, 0x10003 ; GFX1250-NEXT: s_bfe_u32 s6, s2, 0x10001 -; GFX1250-NEXT: s_bfe_u32 s2, s2, 0x10002 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:32 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v2, s3 +; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s3 ; GFX1250-NEXT: s_and_b64 s[2:3], s[4:5], 1 ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] offset:16 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v0, s7 :: v_dual_mov_b32 v2, s6 +; GFX1250-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v2, s6 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b128 v1, v[4:7], s[0:1] offset:256 ; GFX1250-NEXT: global_store_b128 v1, v[0:3], s[0:1] @@ -11207,266 +11192,284 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX1250-LABEL: constant_sextload_v64i1_to_v64i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b64 s[10:11], s[2:3], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_lshr_b32 s2, s11, 1 +; GFX1250-NEXT: s_lshr_b32 s40, s11, 18 +; GFX1250-NEXT: v_writelane_b32 v13, s2, 0 +; GFX1250-NEXT: s_lshr_b32 s44, s11, 19 +; GFX1250-NEXT: s_lshr_b32 s48, s11, 20 +; GFX1250-NEXT: s_lshr_b32 s58, s11, 22 +; GFX1250-NEXT: s_lshr_b32 s62, s11, 23 +; GFX1250-NEXT: v_writelane_b32 v13, s3, 1 +; GFX1250-NEXT: s_lshr_b32 s2, s11, 2 +; GFX1250-NEXT: s_lshr_b32 s70, s11, 25 ; GFX1250-NEXT: s_lshr_b32 s96, s11, 30 ; GFX1250-NEXT: s_lshr_b32 s98, s11, 31 -; GFX1250-NEXT: s_lshr_b32 s92, s11, 28 -; GFX1250-NEXT: s_lshr_b32 s94, s11, 29 -; GFX1250-NEXT: s_lshr_b32 s78, s11, 26 -; GFX1250-NEXT: s_lshr_b32 s88, s11, 27 +; GFX1250-NEXT: v_writelane_b32 v13, s2, 2 +; GFX1250-NEXT: s_lshr_b32 s54, s11, 21 +; GFX1250-NEXT: s_lshr_b32 s86, s11, 28 +; GFX1250-NEXT: s_lshr_b32 s92, s11, 29 +; GFX1250-NEXT: s_lshr_b32 s66, s11, 24 +; GFX1250-NEXT: v_writelane_b32 v13, s3, 3 +; GFX1250-NEXT: s_lshr_b32 s2, s11, 3 +; GFX1250-NEXT: s_lshr_b32 s74, s11, 26 +; GFX1250-NEXT: s_lshr_b32 s82, s11, 27 ; GFX1250-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000 +; GFX1250-NEXT: v_writelane_b32 v13, s2, 4 ; GFX1250-NEXT: s_bfe_i64 s[100:101], s[98:99], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s66, s11, 24 -; GFX1250-NEXT: s_lshr_b32 s74, s11, 25 +; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 +; GFX1250-NEXT: v_writelane_b32 v13, s3, 5 +; GFX1250-NEXT: s_lshr_b32 s2, s11, 4 +; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 +; GFX1250-NEXT: v_writelane_b32 v13, s2, 6 +; GFX1250-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s96 -; GFX1250-NEXT: s_lshr_b32 s56, s11, 22 -; GFX1250-NEXT: s_lshr_b32 s62, s11, 23 +; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v12, 0 :: v_dual_mov_b32 v0, s96 +; GFX1250-NEXT: v_writelane_b32 v13, s3, 7 +; GFX1250-NEXT: s_lshr_b32 s24, s11, 16 +; GFX1250-NEXT: s_lshr_b32 s34, s11, 13 +; GFX1250-NEXT: s_lshr_b32 s36, s11, 14 +; GFX1250-NEXT: s_lshr_b32 s38, s11, 15 ; GFX1250-NEXT: v_dual_mov_b32 v1, s97 :: v_dual_mov_b32 v2, s100 -; GFX1250-NEXT: v_dual_mov_b32 v3, s101 :: v_dual_mov_b32 v4, s92 -; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s44, s11, 20 -; GFX1250-NEXT: s_lshr_b32 s52, s11, 21 -; GFX1250-NEXT: s_lshr_b32 s30, s11, 18 -; GFX1250-NEXT: s_lshr_b32 s40, s11, 19 -; GFX1250-NEXT: s_lshr_b32 s18, s11, 16 -; GFX1250-NEXT: s_lshr_b32 s26, s11, 17 -; GFX1250-NEXT: s_lshr_b32 s2, s11, 14 -; GFX1250-NEXT: s_lshr_b32 s4, s11, 15 -; GFX1250-NEXT: v_dual_mov_b32 v5, s93 :: v_dual_mov_b32 v6, s94 -; GFX1250-NEXT: v_dual_mov_b32 v7, s95 :: v_dual_mov_b32 v10, s78 -; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v3, s101 :: v_dual_mov_b32 v4, s86 ; GFX1250-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s6, s11, 12 -; GFX1250-NEXT: s_lshr_b32 s8, s11, 13 -; GFX1250-NEXT: v_dual_mov_b32 v11, s79 :: v_dual_mov_b32 v12, s88 -; GFX1250-NEXT: v_dual_mov_b32 v13, s89 :: v_dual_mov_b32 v14, s66 -; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s12, s11, 10 -; GFX1250-NEXT: s_lshr_b32 s14, s11, 11 -; GFX1250-NEXT: v_dual_mov_b32 v15, s67 :: v_dual_mov_b32 v16, s74 -; GFX1250-NEXT: v_dual_mov_b32 v17, s75 :: v_dual_mov_b32 v18, s56 -; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s8, s10, 4 +; GFX1250-NEXT: s_lshr_b32 s6, s10, 5 +; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s4, s10, 6 +; GFX1250-NEXT: v_dual_mov_b32 v17, s71 :: v_dual_mov_b32 v18, s58 +; GFX1250-NEXT: s_lshr_b32 s58, s10, 8 +; GFX1250-NEXT: v_dual_mov_b32 v21, s63 :: v_dual_mov_b32 v22, s48 +; GFX1250-NEXT: s_lshr_b32 s48, s10, 10 +; GFX1250-NEXT: v_dual_mov_b32 v27, s41 :: v_dual_mov_b32 v28, s44 +; GFX1250-NEXT: s_lshr_b32 s44, s10, 13 +; GFX1250-NEXT: s_lshr_b32 s26, s11, 17 +; GFX1250-NEXT: s_mov_b32 s42, s11 +; GFX1250-NEXT: v_dual_mov_b32 v5, s87 :: v_dual_mov_b32 v6, s92 +; GFX1250-NEXT: v_dual_mov_b32 v7, s93 :: v_dual_mov_b32 v8, s74 +; GFX1250-NEXT: s_lshr_b32 s2, s10, 7 +; GFX1250-NEXT: v_dual_mov_b32 v23, s49 :: v_dual_mov_b32 v24, s54 +; GFX1250-NEXT: s_lshr_b32 s54, s10, 11 +; GFX1250-NEXT: s_lshr_b32 s30, s11, 12 +; GFX1250-NEXT: v_dual_mov_b32 v9, s75 :: v_dual_mov_b32 v10, s82 +; GFX1250-NEXT: v_dual_mov_b32 v11, s83 :: v_dual_mov_b32 v14, s66 +; GFX1250-NEXT: v_dual_mov_b32 v15, s67 :: v_dual_mov_b32 v16, s70 +; GFX1250-NEXT: v_dual_mov_b32 v19, s59 :: v_dual_mov_b32 v20, s62 +; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_dual_mov_b32 v29, s45 :: v_dual_mov_b32 v30, s24 +; GFX1250-NEXT: s_bfe_i64 s[82:83], s[44:45], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[74:75], s[48:49], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[66:67], s[58:59], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[58:59], s[4:5], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[44:45], s[6:7], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[48:49], s[8:9], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[4:5], s[38:39], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[6:7], s[36:37], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[8:9], s[34:35], 0x10000 +; GFX1250-NEXT: v_readlane_b32 s34, v13, 6 +; GFX1250-NEXT: v_readlane_b32 s36, v13, 4 +; GFX1250-NEXT: v_readlane_b32 s38, v13, 2 +; GFX1250-NEXT: s_lshr_b32 s22, s11, 10 +; GFX1250-NEXT: s_lshr_b32 s28, s11, 11 +; GFX1250-NEXT: v_dual_mov_b32 v25, s55 :: v_dual_mov_b32 v26, s40 ; GFX1250-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s16, s11, 8 +; GFX1250-NEXT: s_bfe_i64 s[70:71], s[54:55], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[54:55], s[2:3], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[2:3], s[42:43], 0x10000 +; GFX1250-NEXT: v_readlane_b32 s35, v13, 7 +; GFX1250-NEXT: v_readlane_b32 s37, v13, 5 +; GFX1250-NEXT: v_readlane_b32 s39, v13, 3 +; GFX1250-NEXT: v_readlane_b32 s42, v13, 0 +; GFX1250-NEXT: s_lshr_b32 s18, s11, 8 ; GFX1250-NEXT: s_lshr_b32 s20, s11, 9 -; GFX1250-NEXT: v_dual_mov_b32 v19, s57 :: v_dual_mov_b32 v20, s62 -; GFX1250-NEXT: v_dual_mov_b32 v21, s63 :: v_dual_mov_b32 v22, s44 -; GFX1250-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x10000 -; GFX1250-NEXT: s_lshr_b32 s22, s11, 6 -; GFX1250-NEXT: s_lshr_b32 s24, s11, 7 -; GFX1250-NEXT: v_dual_mov_b32 v23, s45 :: v_dual_mov_b32 v24, s52 -; GFX1250-NEXT: v_dual_mov_b32 v25, s53 :: v_dual_mov_b32 v26, s30 -; GFX1250-NEXT: v_dual_mov_b32 v27, s31 :: v_dual_mov_b32 v28, s40 -; GFX1250-NEXT: v_dual_mov_b32 v29, s41 :: v_dual_mov_b32 v30, s18 -; GFX1250-NEXT: v_dual_mov_b32 v31, s19 :: v_dual_mov_b32 v32, s26 +; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 +; GFX1250-NEXT: v_readlane_b32 s43, v13, 1 +; GFX1250-NEXT: s_lshr_b32 s14, s11, 6 +; GFX1250-NEXT: s_lshr_b32 s16, s11, 7 +; GFX1250-NEXT: v_dual_mov_b32 v31, s25 :: v_dual_mov_b32 v32, s26 ; GFX1250-NEXT: v_mov_b32_e32 v33, s27 -; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 ; GFX1250-NEXT: s_clause 0x7 -; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:496 -; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:480 -; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:464 -; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:448 -; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:432 -; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:416 -; GFX1250-NEXT: global_store_b128 v8, v[26:29], s[0:1] offset:400 -; GFX1250-NEXT: global_store_b128 v8, v[30:33], s[0:1] offset:384 +; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:496 +; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:480 +; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:464 +; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:448 +; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:432 +; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:416 +; GFX1250-NEXT: global_store_b128 v12, v[26:29], s[0:1] offset:400 +; GFX1250-NEXT: global_store_b128 v12, v[30:33], s[0:1] offset:384 ; GFX1250-NEXT: s_wait_xcnt 0x7 -; GFX1250-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 +; GFX1250-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 ; GFX1250-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 ; GFX1250-NEXT: s_wait_xcnt 0x6 -; GFX1250-NEXT: v_mov_b32_e32 v4, s6 -; GFX1250-NEXT: s_lshr_b32 s28, s11, 4 -; GFX1250-NEXT: s_lshr_b32 s34, s11, 5 -; GFX1250-NEXT: s_lshr_b32 s36, s11, 2 -; GFX1250-NEXT: s_lshr_b32 s38, s11, 3 +; GFX1250-NEXT: v_mov_b32_e32 v4, s30 +; GFX1250-NEXT: s_lshr_b32 s12, s11, 5 ; GFX1250-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v5, s7 :: v_dual_mov_b32 v6, s8 +; GFX1250-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v5, s31 :: v_dual_mov_b32 v6, s8 ; GFX1250-NEXT: s_wait_xcnt 0x5 -; GFX1250-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v10, s12 -; GFX1250-NEXT: s_lshr_b32 s42, s11, 1 -; GFX1250-NEXT: s_mov_b32 s46, s11 -; GFX1250-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v11, s13 :: v_dual_mov_b32 v12, s14 +; GFX1250-NEXT: v_dual_mov_b32 v7, s9 :: v_dual_mov_b32 v8, s22 +; GFX1250-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v10, s28 ; GFX1250-NEXT: s_wait_xcnt 0x4 -; GFX1250-NEXT: v_dual_mov_b32 v13, s15 :: v_dual_mov_b32 v14, s16 -; GFX1250-NEXT: s_lshr_b32 s48, s10, 30 -; GFX1250-NEXT: s_lshr_b32 s50, s10, 31 -; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v11, s29 :: v_dual_mov_b32 v14, s18 +; GFX1250-NEXT: s_lshr_b32 s94, s10, 30 +; GFX1250-NEXT: s_lshr_b32 s98, s10, 31 +; GFX1250-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v15, s17 :: v_dual_mov_b32 v16, s20 +; GFX1250-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v15, s19 :: v_dual_mov_b32 v16, s20 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v17, s21 :: v_dual_mov_b32 v18, s22 -; GFX1250-NEXT: s_lshr_b32 s54, s10, 28 -; GFX1250-NEXT: s_lshr_b32 s58, s10, 29 -; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v17, s21 :: v_dual_mov_b32 v18, s14 +; GFX1250-NEXT: s_lshr_b32 s88, s10, 28 +; GFX1250-NEXT: s_lshr_b32 s90, s10, 29 ; GFX1250-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v19, s23 :: v_dual_mov_b32 v20, s24 +; GFX1250-NEXT: v_dual_mov_b32 v19, s15 :: v_dual_mov_b32 v20, s16 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_dual_mov_b32 v21, s25 :: v_dual_mov_b32 v22, s28 -; GFX1250-NEXT: s_lshr_b32 s60, s10, 26 -; GFX1250-NEXT: s_lshr_b32 s64, s10, 27 -; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v23, s29 :: v_dual_mov_b32 v24, s34 -; GFX1250-NEXT: v_mov_b32_e32 v25, s35 +; GFX1250-NEXT: v_dual_mov_b32 v21, s17 :: v_dual_mov_b32 v22, s34 +; GFX1250-NEXT: s_lshr_b32 s80, s10, 26 +; GFX1250-NEXT: s_lshr_b32 s84, s10, 27 +; GFX1250-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[94:95], s[94:95], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v23, s35 :: v_dual_mov_b32 v24, s12 +; GFX1250-NEXT: v_mov_b32_e32 v25, s13 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:368 -; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:352 -; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:336 -; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:320 -; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:304 -; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:288 +; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:368 +; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:352 +; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:336 +; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:320 +; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:304 +; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:288 ; GFX1250-NEXT: s_wait_xcnt 0x5 -; GFX1250-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v1, s37 -; GFX1250-NEXT: v_dual_mov_b32 v2, s38 :: v_dual_mov_b32 v3, s39 +; GFX1250-NEXT: v_dual_mov_b32 v0, s38 :: v_dual_mov_b32 v1, s39 +; GFX1250-NEXT: v_dual_mov_b32 v2, s36 :: v_dual_mov_b32 v3, s37 ; GFX1250-NEXT: s_wait_xcnt 0x4 -; GFX1250-NEXT: v_mov_b32_e32 v4, s46 -; GFX1250-NEXT: s_lshr_b32 s68, s10, 24 -; GFX1250-NEXT: s_lshr_b32 s70, s10, 25 -; GFX1250-NEXT: s_lshr_b32 s72, s10, 22 -; GFX1250-NEXT: s_lshr_b32 s76, s10, 23 -; GFX1250-NEXT: s_bfe_i64 s[58:59], s[58:59], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v5, s47 :: v_dual_mov_b32 v6, s42 +; GFX1250-NEXT: v_mov_b32_e32 v4, s2 +; GFX1250-NEXT: s_lshr_b32 s68, s10, 22 +; GFX1250-NEXT: s_lshr_b32 s72, s10, 23 +; GFX1250-NEXT: s_lshr_b32 s76, s10, 24 +; GFX1250-NEXT: s_lshr_b32 s78, s10, 25 +; GFX1250-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[88:89], s[88:89], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v5, s3 :: v_dual_mov_b32 v6, s42 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v10, s48 -; GFX1250-NEXT: s_lshr_b32 s80, s10, 20 -; GFX1250-NEXT: s_lshr_b32 s82, s10, 21 -; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v11, s49 :: v_dual_mov_b32 v12, s50 +; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v8, s94 +; GFX1250-NEXT: s_lshr_b32 s60, s10, 20 +; GFX1250-NEXT: s_lshr_b32 s64, s10, 21 +; GFX1250-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v9, s95 :: v_dual_mov_b32 v10, s98 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_dual_mov_b32 v13, s51 :: v_dual_mov_b32 v14, s54 -; GFX1250-NEXT: s_lshr_b32 s84, s10, 18 -; GFX1250-NEXT: s_lshr_b32 s86, s10, 19 +; GFX1250-NEXT: v_dual_mov_b32 v11, s99 :: v_dual_mov_b32 v14, s88 +; GFX1250-NEXT: s_lshr_b32 s52, s10, 18 +; GFX1250-NEXT: s_lshr_b32 s56, s10, 19 +; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[76:77], s[76:77], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[70:71], s[70:71], 0x10000 ; GFX1250-NEXT: s_bfe_i64 s[68:69], s[68:69], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v15, s55 :: v_dual_mov_b32 v16, s58 +; GFX1250-NEXT: v_dual_mov_b32 v15, s89 :: v_dual_mov_b32 v16, s90 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v17, s59 :: v_dual_mov_b32 v18, s60 -; GFX1250-NEXT: s_lshr_b32 s90, s10, 16 -; GFX1250-NEXT: s_lshr_b32 s98, s10, 17 -; GFX1250-NEXT: s_bfe_i64 s[82:83], s[82:83], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[80:81], s[80:81], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v19, s61 :: v_dual_mov_b32 v20, s64 +; GFX1250-NEXT: v_dual_mov_b32 v17, s91 :: v_dual_mov_b32 v18, s80 +; GFX1250-NEXT: s_lshr_b32 s46, s10, 16 +; GFX1250-NEXT: s_lshr_b32 s50, s10, 17 +; GFX1250-NEXT: s_bfe_i64 s[64:65], s[64:65], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[60:61], s[60:61], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v19, s81 :: v_dual_mov_b32 v20, s84 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v21, s65 :: v_dual_mov_b32 v22, s68 -; GFX1250-NEXT: s_lshr_b32 s96, s10, 14 -; GFX1250-NEXT: s_lshr_b32 s100, s10, 15 -; GFX1250-NEXT: s_lshr_b32 s94, s10, 13 -; GFX1250-NEXT: s_lshr_b32 s88, s10, 11 -; GFX1250-NEXT: s_lshr_b32 s74, s10, 9 -; GFX1250-NEXT: s_lshr_b32 s62, s10, 7 -; GFX1250-NEXT: s_lshr_b32 s52, s10, 5 -; GFX1250-NEXT: s_lshr_b32 s40, s10, 3 -; GFX1250-NEXT: s_lshr_b32 s26, s10, 1 -; GFX1250-NEXT: s_bfe_i64 s[86:87], s[86:87], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[84:85], s[84:85], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v23, s69 :: v_dual_mov_b32 v24, s70 -; GFX1250-NEXT: v_mov_b32_e32 v25, s71 +; GFX1250-NEXT: v_dual_mov_b32 v21, s85 :: v_dual_mov_b32 v22, s76 +; GFX1250-NEXT: s_lshr_b32 s24, s10, 14 +; GFX1250-NEXT: s_lshr_b32 s26, s10, 15 +; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v23, s77 :: v_dual_mov_b32 v24, s78 +; GFX1250-NEXT: v_mov_b32_e32 v25, s79 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:272 -; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:256 -; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:240 -; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:224 -; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:208 -; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:192 +; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:272 +; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:256 +; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:240 +; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:224 +; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:208 +; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:192 ; GFX1250-NEXT: s_wait_xcnt 0x5 -; GFX1250-NEXT: v_dual_mov_b32 v0, s72 :: v_dual_mov_b32 v1, s73 -; GFX1250-NEXT: v_dual_mov_b32 v2, s76 :: v_dual_mov_b32 v3, s77 +; GFX1250-NEXT: v_dual_mov_b32 v0, s68 :: v_dual_mov_b32 v1, s69 +; GFX1250-NEXT: v_dual_mov_b32 v2, s72 :: v_dual_mov_b32 v3, s73 ; GFX1250-NEXT: s_wait_xcnt 0x4 -; GFX1250-NEXT: v_mov_b32_e32 v4, s80 -; GFX1250-NEXT: s_lshr_b32 s92, s10, 12 -; GFX1250-NEXT: s_lshr_b32 s78, s10, 10 -; GFX1250-NEXT: s_bfe_i64 s[98:99], s[98:99], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[90:91], s[90:91], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v5, s81 :: v_dual_mov_b32 v6, s82 +; GFX1250-NEXT: v_mov_b32_e32 v4, s60 +; GFX1250-NEXT: s_lshr_b32 s40, s10, 12 +; GFX1250-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v5, s61 :: v_dual_mov_b32 v6, s64 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v7, s83 :: v_dual_mov_b32 v10, s84 -; GFX1250-NEXT: s_lshr_b32 s66, s10, 8 -; GFX1250-NEXT: s_lshr_b32 s56, s10, 6 -; GFX1250-NEXT: s_lshr_b32 s44, s10, 4 -; GFX1250-NEXT: s_lshr_b32 s30, s10, 2 -; GFX1250-NEXT: s_bfe_i64 s[18:19], s[10:11], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[10:11], s[26:27], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[26:27], s[40:41], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[40:41], s[52:53], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[52:53], s[62:63], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[62:63], s[74:75], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[74:75], s[88:89], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[88:89], s[94:95], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[94:95], s[100:101], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[96:97], s[96:97], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v11, s85 :: v_dual_mov_b32 v12, s86 +; GFX1250-NEXT: v_dual_mov_b32 v7, s65 :: v_dual_mov_b32 v8, s52 +; GFX1250-NEXT: s_lshr_b32 s62, s10, 9 +; GFX1250-NEXT: s_bfe_i64 s[92:93], s[26:27], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[96:97], s[24:25], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v9, s53 :: v_dual_mov_b32 v10, s56 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_dual_mov_b32 v13, s87 :: v_dual_mov_b32 v14, s90 -; GFX1250-NEXT: s_bfe_i64 s[78:79], s[78:79], 0x10000 -; GFX1250-NEXT: s_bfe_i64 s[92:93], s[92:93], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v15, s91 :: v_dual_mov_b32 v16, s98 +; GFX1250-NEXT: v_dual_mov_b32 v11, s57 :: v_dual_mov_b32 v14, s46 +; GFX1250-NEXT: s_bfe_i64 s[86:87], s[40:41], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v15, s47 :: v_dual_mov_b32 v16, s50 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v17, s99 :: v_dual_mov_b32 v18, s96 -; GFX1250-NEXT: s_bfe_i64 s[66:67], s[66:67], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v19, s97 :: v_dual_mov_b32 v20, s94 +; GFX1250-NEXT: v_dual_mov_b32 v17, s51 :: v_dual_mov_b32 v18, s96 +; GFX1250-NEXT: s_bfe_i64 s[62:63], s[62:63], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v19, s97 :: v_dual_mov_b32 v20, s92 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v21, s95 :: v_dual_mov_b32 v22, s92 -; GFX1250-NEXT: s_bfe_i64 s[56:57], s[56:57], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v23, s93 :: v_dual_mov_b32 v24, s88 -; GFX1250-NEXT: v_mov_b32_e32 v25, s89 +; GFX1250-NEXT: v_dual_mov_b32 v21, s93 :: v_dual_mov_b32 v22, s86 +; GFX1250-NEXT: s_lshr_b32 s102, s10, 2 +; GFX1250-NEXT: s_lshr_b32 vcc_lo, s10, 3 +; GFX1250-NEXT: v_dual_mov_b32 v23, s87 :: v_dual_mov_b32 v24, s82 +; GFX1250-NEXT: v_mov_b32_e32 v25, s83 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:176 -; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:160 -; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:144 -; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:128 -; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:112 -; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] offset:96 +; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:176 +; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:160 +; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:144 +; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:128 +; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:112 +; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] offset:96 ; GFX1250-NEXT: s_wait_xcnt 0x5 -; GFX1250-NEXT: v_dual_mov_b32 v0, s78 :: v_dual_mov_b32 v1, s79 -; GFX1250-NEXT: v_dual_mov_b32 v2, s74 :: v_dual_mov_b32 v3, s75 +; GFX1250-NEXT: v_dual_mov_b32 v0, s74 :: v_dual_mov_b32 v1, s75 +; GFX1250-NEXT: v_dual_mov_b32 v2, s70 :: v_dual_mov_b32 v3, s71 ; GFX1250-NEXT: s_wait_xcnt 0x4 ; GFX1250-NEXT: v_mov_b32_e32 v4, s66 -; GFX1250-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x10000 +; GFX1250-NEXT: s_lshr_b32 s100, s10, 1 ; GFX1250-NEXT: v_dual_mov_b32 v5, s67 :: v_dual_mov_b32 v6, s62 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v10, s56 -; GFX1250-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX1250-NEXT: v_dual_mov_b32 v11, s57 :: v_dual_mov_b32 v12, s52 +; GFX1250-NEXT: v_dual_mov_b32 v7, s63 :: v_dual_mov_b32 v8, s58 +; GFX1250-NEXT: s_bfe_i64 s[26:27], vcc, 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[40:41], s[102:103], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v9, s59 :: v_dual_mov_b32 v10, s54 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_dual_mov_b32 v13, s53 :: v_dual_mov_b32 v14, s44 -; GFX1250-NEXT: v_dual_mov_b32 v15, s45 :: v_dual_mov_b32 v16, s40 +; GFX1250-NEXT: v_dual_mov_b32 v11, s55 :: v_dual_mov_b32 v14, s48 +; GFX1250-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x10000 +; GFX1250-NEXT: s_bfe_i64 s[24:25], s[100:101], 0x10000 +; GFX1250-NEXT: v_dual_mov_b32 v15, s49 :: v_dual_mov_b32 v16, s44 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v17, s41 :: v_dual_mov_b32 v18, s30 -; GFX1250-NEXT: v_dual_mov_b32 v19, s31 :: v_dual_mov_b32 v20, s26 +; GFX1250-NEXT: v_dual_mov_b32 v17, s45 :: v_dual_mov_b32 v18, s40 +; GFX1250-NEXT: v_dual_mov_b32 v19, s41 :: v_dual_mov_b32 v20, s26 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v21, s27 :: v_dual_mov_b32 v22, s18 -; GFX1250-NEXT: v_dual_mov_b32 v23, s19 :: v_dual_mov_b32 v24, s10 -; GFX1250-NEXT: v_mov_b32_e32 v25, s11 +; GFX1250-NEXT: v_dual_mov_b32 v21, s27 :: v_dual_mov_b32 v22, s10 +; GFX1250-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v24, s24 +; GFX1250-NEXT: v_mov_b32_e32 v25, s25 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:80 -; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] offset:64 -; GFX1250-NEXT: global_store_b128 v8, v[10:13], s[0:1] offset:48 -; GFX1250-NEXT: global_store_b128 v8, v[14:17], s[0:1] offset:32 -; GFX1250-NEXT: global_store_b128 v8, v[18:21], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v8, v[22:25], s[0:1] +; GFX1250-NEXT: global_store_b128 v12, v[0:3], s[0:1] offset:80 +; GFX1250-NEXT: global_store_b128 v12, v[4:7], s[0:1] offset:64 +; GFX1250-NEXT: global_store_b128 v12, v[8:11], s[0:1] offset:48 +; GFX1250-NEXT: global_store_b128 v12, v[14:17], s[0:1] offset:32 +; GFX1250-NEXT: global_store_b128 v12, v[18:21], s[0:1] offset:16 +; GFX1250-NEXT: global_store_b128 v12, v[22:25], s[0:1] ; GFX1250-NEXT: s_endpgm %load = load <64 x i1>, ptr addrspace(4) %in %ext = sext <64 x i1> %load to <64 x i64> diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll index 6f7ee70812264..2e1abfc33115c 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i32.ll @@ -2193,15 +2193,15 @@ define amdgpu_kernel void @constant_sextload_v2i32_to_v2i64(ptr addrspace(1) %ou ; GFX1250-LABEL: constant_sextload_v2i32_to_v2i64: ; GFX1250: ; %bb.0: ; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v4, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v0, s2 -; GFX1250-NEXT: s_ashr_i32 s4, s3, 31 -; GFX1250-NEXT: s_ashr_i32 s5, s2, 31 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v1, s5 -; GFX1250-NEXT: v_mov_b32_e32 v3, s4 +; GFX1250-NEXT: s_ashr_i32 s5, s3, 31 +; GFX1250-NEXT: s_mov_b32 s4, s3 +; GFX1250-NEXT: s_ashr_i32 s3, s2, 31 +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[4:5] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[0:1] ; GFX1250-NEXT: s_endpgm %ld = load <2 x i32>, ptr addrspace(4) %in @@ -2520,14 +2520,15 @@ define amdgpu_kernel void @constant_sextload_v4i32_to_v4i64(ptr addrspace(1) %ou ; GFX1250-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s6 -; GFX1250-NEXT: s_ashr_i32 s8, s7, 31 -; GFX1250-NEXT: s_ashr_i32 s9, s6, 31 -; GFX1250-NEXT: s_ashr_i32 s2, s5, 31 -; GFX1250-NEXT: s_ashr_i32 s3, s4, 31 -; GFX1250-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v4, s4 -; GFX1250-NEXT: v_dual_mov_b32 v6, s5 :: v_dual_mov_b32 v1, s9 -; GFX1250-NEXT: v_dual_mov_b32 v3, s8 :: v_dual_mov_b32 v5, s3 -; GFX1250-NEXT: v_mov_b32_e32 v7, s2 +; GFX1250-NEXT: s_ashr_i32 s2, s7, 31 +; GFX1250-NEXT: s_mov_b32 s3, s7 +; GFX1250-NEXT: s_ashr_i32 s7, s6, 31 +; GFX1250-NEXT: s_ashr_i32 s8, s5, 31 +; GFX1250-NEXT: s_ashr_i32 s9, s4, 31 +; GFX1250-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v1, s7 +; GFX1250-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v3, s2 +; GFX1250-NEXT: v_dual_mov_b32 v5, s9 :: v_dual_mov_b32 v6, s5 +; GFX1250-NEXT: v_mov_b32_e32 v7, s8 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b128 v8, v[0:3], s[0:1] offset:16 ; GFX1250-NEXT: global_store_b128 v8, v[4:7], s[0:1] @@ -3025,32 +3026,32 @@ define amdgpu_kernel void @constant_sextload_v8i32_to_v8i64(ptr addrspace(1) %ou ; ; GFX1250-LABEL: constant_sextload_v8i32_to_v8i64: ; GFX1250: ; %bb.0: -; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_load_b256 s[4:11], s[2:3], 0x0 +; GFX1250-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v0, s10 -; GFX1250-NEXT: s_ashr_i32 s16, s11, 31 -; GFX1250-NEXT: s_ashr_i32 s17, s10, 31 -; GFX1250-NEXT: s_ashr_i32 s14, s9, 31 -; GFX1250-NEXT: s_ashr_i32 s15, s8, 31 -; GFX1250-NEXT: s_ashr_i32 s12, s7, 31 -; GFX1250-NEXT: s_ashr_i32 s13, s6, 31 -; GFX1250-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v4, s8 -; GFX1250-NEXT: v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v1, s17 -; GFX1250-NEXT: v_dual_mov_b32 v3, s16 :: v_dual_mov_b32 v5, s15 -; GFX1250-NEXT: s_ashr_i32 s2, s5, 31 -; GFX1250-NEXT: s_ashr_i32 s3, s4, 31 -; GFX1250-NEXT: v_dual_mov_b32 v6, s9 :: v_dual_mov_b32 v8, s6 -; GFX1250-NEXT: v_dual_mov_b32 v7, s14 :: v_dual_mov_b32 v9, s13 -; GFX1250-NEXT: v_dual_mov_b32 v10, s7 :: v_dual_mov_b32 v12, s4 -; GFX1250-NEXT: v_dual_mov_b32 v11, s12 :: v_dual_mov_b32 v13, s3 -; GFX1250-NEXT: v_mov_b32_e32 v15, s2 +; GFX1250-NEXT: v_dual_mov_b32 v16, 0 :: v_dual_mov_b32 v0, s6 +; GFX1250-NEXT: s_ashr_i32 s10, s7, 31 +; GFX1250-NEXT: s_ashr_i32 s11, s6, 31 +; GFX1250-NEXT: s_ashr_i32 s12, s5, 31 +; GFX1250-NEXT: s_ashr_i32 s13, s4, 31 +; GFX1250-NEXT: s_ashr_i32 s14, s3, 31 +; GFX1250-NEXT: s_ashr_i32 s15, s2, 31 +; GFX1250-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v1, s11 +; GFX1250-NEXT: v_dual_mov_b32 v2, s7 :: v_dual_mov_b32 v3, s10 +; GFX1250-NEXT: s_ashr_i32 s16, s1, 31 +; GFX1250-NEXT: s_ashr_i32 s17, s0, 31 +; GFX1250-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v8, s2 +; GFX1250-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v6, s5 +; GFX1250-NEXT: v_dual_mov_b32 v7, s12 :: v_dual_mov_b32 v9, s15 +; GFX1250-NEXT: v_dual_mov_b32 v10, s3 :: v_dual_mov_b32 v11, s14 +; GFX1250-NEXT: v_dual_mov_b32 v13, s17 :: v_dual_mov_b32 v14, s1 +; GFX1250-NEXT: v_mov_b32_e32 v15, s16 ; GFX1250-NEXT: s_clause 0x3 -; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[0:1] offset:48 -; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[0:1] offset:32 -; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[0:1] offset:16 -; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[0:1] +; GFX1250-NEXT: global_store_b128 v16, v[0:3], s[8:9] offset:48 +; GFX1250-NEXT: global_store_b128 v16, v[4:7], s[8:9] offset:32 +; GFX1250-NEXT: global_store_b128 v16, v[8:11], s[8:9] offset:16 +; GFX1250-NEXT: global_store_b128 v16, v[12:15], s[8:9] ; GFX1250-NEXT: s_endpgm %ld = load <8 x i32>, ptr addrspace(4) %in %ext = sext <8 x i32> %ld to <8 x i64> @@ -3536,50 +3537,51 @@ define amdgpu_kernel void @constant_sextload_v16i32_to_v16i64(ptr addrspace(1) % ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_load_b512 s[0:15], s[18:19], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v28, 0 :: v_dual_mov_b32 v0, s14 -; GFX1250-NEXT: s_ashr_i32 s28, s11, 31 -; GFX1250-NEXT: s_ashr_i32 s29, s10, 31 -; GFX1250-NEXT: s_ashr_i32 s30, s13, 31 -; GFX1250-NEXT: s_ashr_i32 s33, s15, 31 -; GFX1250-NEXT: s_ashr_i32 s34, s14, 31 -; GFX1250-NEXT: s_ashr_i32 s26, s9, 31 -; GFX1250-NEXT: s_ashr_i32 s27, s8, 31 -; GFX1250-NEXT: s_ashr_i32 s31, s12, 31 -; GFX1250-NEXT: s_ashr_i32 s24, s7, 31 -; GFX1250-NEXT: s_ashr_i32 s25, s6, 31 -; GFX1250-NEXT: v_dual_mov_b32 v2, s15 :: v_dual_mov_b32 v4, s12 -; GFX1250-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v8, s10 -; GFX1250-NEXT: v_dual_mov_b32 v10, s11 :: v_dual_mov_b32 v12, s8 -; GFX1250-NEXT: v_dual_mov_b32 v26, s3 :: v_dual_mov_b32 v1, s34 -; GFX1250-NEXT: v_dual_mov_b32 v3, s33 :: v_dual_mov_b32 v5, s31 -; GFX1250-NEXT: v_dual_mov_b32 v7, s30 :: v_dual_mov_b32 v9, s29 -; GFX1250-NEXT: v_dual_mov_b32 v11, s28 :: v_dual_mov_b32 v13, s27 -; GFX1250-NEXT: s_ashr_i32 s22, s5, 31 -; GFX1250-NEXT: s_ashr_i32 s23, s4, 31 -; GFX1250-NEXT: v_dual_mov_b32 v14, s9 :: v_dual_mov_b32 v16, s6 -; GFX1250-NEXT: v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v17, s25 -; GFX1250-NEXT: s_ashr_i32 s20, s3, 31 -; GFX1250-NEXT: s_ashr_i32 s21, s2, 31 -; GFX1250-NEXT: v_dual_mov_b32 v18, s7 :: v_dual_mov_b32 v20, s4 -; GFX1250-NEXT: v_dual_mov_b32 v19, s24 :: v_dual_mov_b32 v21, s23 -; GFX1250-NEXT: s_ashr_i32 s18, s1, 31 -; GFX1250-NEXT: s_ashr_i32 s19, s0, 31 -; GFX1250-NEXT: v_dual_mov_b32 v22, s5 :: v_dual_mov_b32 v24, s2 -; GFX1250-NEXT: v_dual_mov_b32 v23, s22 :: v_dual_mov_b32 v25, s21 -; GFX1250-NEXT: v_mov_b32_e32 v27, s20 +; GFX1250-NEXT: v_dual_mov_b32 v24, 0 :: v_dual_mov_b32 v0, s14 +; GFX1250-NEXT: s_ashr_i32 s18, s15, 31 +; GFX1250-NEXT: s_ashr_i32 s19, s14, 31 +; GFX1250-NEXT: s_ashr_i32 s20, s13, 31 +; GFX1250-NEXT: s_ashr_i32 s21, s12, 31 +; GFX1250-NEXT: s_ashr_i32 s22, s11, 31 +; GFX1250-NEXT: s_ashr_i32 s23, s10, 31 +; GFX1250-NEXT: v_dual_mov_b32 v1, s19 :: v_dual_mov_b32 v2, s15 +; GFX1250-NEXT: v_dual_mov_b32 v3, s18 :: v_dual_mov_b32 v5, s21 +; GFX1250-NEXT: s_ashr_i32 s24, s9, 31 +; GFX1250-NEXT: s_ashr_i32 s25, s8, 31 +; GFX1250-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v8, s10 +; GFX1250-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v7, s20 +; GFX1250-NEXT: s_ashr_i32 s26, s7, 31 +; GFX1250-NEXT: s_ashr_i32 s27, s6, 31 +; GFX1250-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v10, s11 +; GFX1250-NEXT: v_dual_mov_b32 v11, s22 :: v_dual_mov_b32 v13, s25 +; GFX1250-NEXT: s_ashr_i32 s28, s5, 31 +; GFX1250-NEXT: s_ashr_i32 s29, s4, 31 +; GFX1250-NEXT: v_dual_mov_b32 v12, s8 :: v_dual_mov_b32 v16, s6 +; GFX1250-NEXT: v_dual_mov_b32 v14, s9 :: v_dual_mov_b32 v15, s24 +; GFX1250-NEXT: s_ashr_i32 s30, s3, 31 +; GFX1250-NEXT: s_ashr_i32 s31, s2, 31 +; GFX1250-NEXT: v_dual_mov_b32 v17, s27 :: v_dual_mov_b32 v18, s7 +; GFX1250-NEXT: v_mov_b32_e32 v19, s26 +; GFX1250-NEXT: s_ashr_i32 s33, s1, 31 +; GFX1250-NEXT: s_ashr_i32 s34, s0, 31 ; GFX1250-NEXT: s_clause 0x1 -; GFX1250-NEXT: global_store_b128 v28, v[0:3], s[16:17] offset:112 -; GFX1250-NEXT: global_store_b128 v28, v[4:7], s[16:17] offset:96 +; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:112 +; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:96 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s19 -; GFX1250-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v3, s18 +; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s29 +; GFX1250-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_mov_b32 v3, s28 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s31 +; GFX1250-NEXT: v_dual_mov_b32 v6, s3 :: v_dual_mov_b32 v7, s30 +; GFX1250-NEXT: v_dual_mov_b32 v20, s0 :: v_dual_mov_b32 v21, s34 +; GFX1250-NEXT: v_dual_mov_b32 v22, s1 :: v_dual_mov_b32 v23, s33 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v28, v[8:11], s[16:17] offset:80 -; GFX1250-NEXT: global_store_b128 v28, v[12:15], s[16:17] offset:64 -; GFX1250-NEXT: global_store_b128 v28, v[16:19], s[16:17] offset:48 -; GFX1250-NEXT: global_store_b128 v28, v[20:23], s[16:17] offset:32 -; GFX1250-NEXT: global_store_b128 v28, v[24:27], s[16:17] offset:16 -; GFX1250-NEXT: global_store_b128 v28, v[0:3], s[16:17] +; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[16:17] offset:80 +; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[16:17] offset:64 +; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[16:17] offset:48 +; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[16:17] offset:32 +; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[16:17] offset:16 +; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[16:17] ; GFX1250-NEXT: s_endpgm %ld = load <16 x i32>, ptr addrspace(4) %in %ext = sext <16 x i32> %ld to <16 x i64> @@ -4873,106 +4875,105 @@ define amdgpu_kernel void @constant_sextload_v32i32_to_v32i64(ptr addrspace(1) % ; GFX1250-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_clause 0x1 -; GFX1250-NEXT: s_load_b512 s[0:15], s[38:39], 0x0 ; GFX1250-NEXT: s_load_b512 s[16:31], s[38:39], 0x40 -; GFX1250-NEXT: v_mov_b32_e32 v24, 0 +; GFX1250-NEXT: s_load_b512 s[0:15], s[38:39], 0x0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_ashr_i32 s49, s15, 31 -; GFX1250-NEXT: s_ashr_i32 s64, s31, 31 -; GFX1250-NEXT: s_ashr_i32 s65, s30, 31 -; GFX1250-NEXT: s_ashr_i32 s62, s29, 31 -; GFX1250-NEXT: s_ashr_i32 s63, s28, 31 -; GFX1250-NEXT: s_ashr_i32 s60, s27, 31 -; GFX1250-NEXT: s_ashr_i32 s61, s26, 31 -; GFX1250-NEXT: v_dual_mov_b32 v0, s30 :: v_dual_mov_b32 v2, s31 -; GFX1250-NEXT: v_dual_mov_b32 v4, s28 :: v_dual_mov_b32 v1, s65 -; GFX1250-NEXT: v_mov_b32_e32 v3, s64 -; GFX1250-NEXT: s_ashr_i32 s58, s25, 31 -; GFX1250-NEXT: s_ashr_i32 s59, s24, 31 -; GFX1250-NEXT: v_dual_mov_b32 v6, s29 :: v_dual_mov_b32 v8, s26 -; GFX1250-NEXT: v_dual_mov_b32 v5, s63 :: v_dual_mov_b32 v7, s62 -; GFX1250-NEXT: v_dual_mov_b32 v9, s61 :: v_dual_mov_b32 v10, s27 -; GFX1250-NEXT: v_dual_mov_b32 v11, s60 :: v_dual_mov_b32 v12, s24 -; GFX1250-NEXT: s_ashr_i32 s57, s23, 31 -; GFX1250-NEXT: v_dual_mov_b32 v13, s59 :: v_dual_mov_b32 v14, s25 -; GFX1250-NEXT: v_mov_b32_e32 v15, s58 -; GFX1250-NEXT: s_ashr_i32 s24, s22, 31 -; GFX1250-NEXT: s_ashr_i32 s55, s21, 31 -; GFX1250-NEXT: s_ashr_i32 s56, s20, 31 -; GFX1250-NEXT: s_ashr_i32 s53, s19, 31 -; GFX1250-NEXT: s_ashr_i32 s54, s18, 31 +; GFX1250-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v2, s30 +; GFX1250-NEXT: s_ashr_i32 s51, s31, 31 +; GFX1250-NEXT: s_ashr_i32 s52, s30, 31 +; GFX1250-NEXT: s_ashr_i32 s53, s29, 31 +; GFX1250-NEXT: s_ashr_i32 s54, s28, 31 +; GFX1250-NEXT: s_ashr_i32 s55, s27, 31 +; GFX1250-NEXT: s_ashr_i32 s56, s26, 31 +; GFX1250-NEXT: v_dual_mov_b32 v3, s52 :: v_dual_mov_b32 v4, s31 +; GFX1250-NEXT: v_dual_mov_b32 v5, s51 :: v_dual_mov_b32 v6, s28 +; GFX1250-NEXT: s_ashr_i32 s57, s25, 31 +; GFX1250-NEXT: s_ashr_i32 s58, s24, 31 +; GFX1250-NEXT: v_dual_mov_b32 v7, s54 :: v_dual_mov_b32 v8, s29 +; GFX1250-NEXT: v_dual_mov_b32 v9, s53 :: v_dual_mov_b32 v10, s26 +; GFX1250-NEXT: v_dual_mov_b32 v11, s56 :: v_dual_mov_b32 v12, s27 +; GFX1250-NEXT: v_dual_mov_b32 v13, s55 :: v_dual_mov_b32 v14, s24 +; GFX1250-NEXT: s_ashr_i32 s59, s23, 31 +; GFX1250-NEXT: s_ashr_i32 s60, s22, 31 +; GFX1250-NEXT: v_dual_mov_b32 v15, s58 :: v_dual_mov_b32 v16, s25 +; GFX1250-NEXT: v_mov_b32_e32 v17, s57 +; GFX1250-NEXT: s_ashr_i32 s61, s21, 31 +; GFX1250-NEXT: s_ashr_i32 s62, s20, 31 +; GFX1250-NEXT: s_ashr_i32 s63, s19, 31 +; GFX1250-NEXT: s_ashr_i32 s64, s18, 31 ; GFX1250-NEXT: s_clause 0x3 -; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:240 -; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:224 -; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:208 -; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:192 +; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[36:37] offset:240 +; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[36:37] offset:224 +; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[36:37] offset:208 +; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[36:37] offset:192 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v0, s22 :: v_dual_mov_b32 v1, s24 -; GFX1250-NEXT: v_dual_mov_b32 v2, s23 :: v_dual_mov_b32 v3, s57 +; GFX1250-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s60 +; GFX1250-NEXT: v_dual_mov_b32 v4, s23 :: v_dual_mov_b32 v5, s59 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_mov_b32_e32 v4, s20 -; GFX1250-NEXT: s_ashr_i32 s51, s17, 31 -; GFX1250-NEXT: s_ashr_i32 s52, s16, 31 -; GFX1250-NEXT: v_dual_mov_b32 v5, s56 :: v_dual_mov_b32 v6, s21 +; GFX1250-NEXT: v_mov_b32_e32 v6, s20 +; GFX1250-NEXT: s_ashr_i32 s65, s17, 31 +; GFX1250-NEXT: s_ashr_i32 s24, s16, 31 +; GFX1250-NEXT: v_dual_mov_b32 v7, s62 :: v_dual_mov_b32 v8, s21 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v7, s55 :: v_dual_mov_b32 v8, s18 -; GFX1250-NEXT: s_ashr_i32 s50, s14, 31 -; GFX1250-NEXT: v_dual_mov_b32 v9, s54 :: v_dual_mov_b32 v10, s19 +; GFX1250-NEXT: v_dual_mov_b32 v9, s61 :: v_dual_mov_b32 v10, s18 +; GFX1250-NEXT: s_ashr_i32 s33, s15, 31 +; GFX1250-NEXT: s_ashr_i32 s34, s14, 31 +; GFX1250-NEXT: v_dual_mov_b32 v11, s64 :: v_dual_mov_b32 v12, s19 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v11, s53 :: v_dual_mov_b32 v12, s16 -; GFX1250-NEXT: s_ashr_i32 s45, s11, 31 -; GFX1250-NEXT: s_ashr_i32 s46, s10, 31 -; GFX1250-NEXT: s_ashr_i32 s47, s13, 31 -; GFX1250-NEXT: s_ashr_i32 s48, s12, 31 -; GFX1250-NEXT: v_dual_mov_b32 v13, s52 :: v_dual_mov_b32 v14, s17 -; GFX1250-NEXT: v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v16, s14 -; GFX1250-NEXT: s_ashr_i32 s43, s9, 31 -; GFX1250-NEXT: s_ashr_i32 s44, s8, 31 -; GFX1250-NEXT: v_dual_mov_b32 v17, s50 :: v_dual_mov_b32 v18, s15 -; GFX1250-NEXT: v_dual_mov_b32 v19, s49 :: v_dual_mov_b32 v20, s12 -; GFX1250-NEXT: s_ashr_i32 s41, s7, 31 -; GFX1250-NEXT: s_ashr_i32 s42, s6, 31 -; GFX1250-NEXT: v_dual_mov_b32 v21, s48 :: v_dual_mov_b32 v22, s13 -; GFX1250-NEXT: v_mov_b32_e32 v23, s47 +; GFX1250-NEXT: v_dual_mov_b32 v13, s63 :: v_dual_mov_b32 v14, s16 +; GFX1250-NEXT: s_ashr_i32 s35, s13, 31 +; GFX1250-NEXT: s_ashr_i32 s38, s12, 31 +; GFX1250-NEXT: s_ashr_i32 s39, s11, 31 +; GFX1250-NEXT: s_ashr_i32 s40, s10, 31 +; GFX1250-NEXT: v_dual_mov_b32 v15, s24 :: v_dual_mov_b32 v16, s17 +; GFX1250-NEXT: v_dual_mov_b32 v17, s65 :: v_dual_mov_b32 v18, s14 +; GFX1250-NEXT: s_ashr_i32 s41, s9, 31 +; GFX1250-NEXT: s_ashr_i32 s42, s8, 31 +; GFX1250-NEXT: v_dual_mov_b32 v19, s34 :: v_dual_mov_b32 v20, s15 +; GFX1250-NEXT: v_dual_mov_b32 v21, s33 :: v_dual_mov_b32 v22, s12 +; GFX1250-NEXT: s_ashr_i32 s43, s7, 31 +; GFX1250-NEXT: s_ashr_i32 s44, s6, 31 +; GFX1250-NEXT: v_dual_mov_b32 v23, s38 :: v_dual_mov_b32 v24, s13 +; GFX1250-NEXT: v_mov_b32_e32 v25, s35 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:176 -; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:160 -; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:144 -; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:128 -; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:112 -; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[36:37] offset:96 +; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[36:37] offset:176 +; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[36:37] offset:160 +; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[36:37] offset:144 +; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[36:37] offset:128 +; GFX1250-NEXT: global_store_b128 v0, v[18:21], s[36:37] offset:112 +; GFX1250-NEXT: global_store_b128 v0, v[22:25], s[36:37] offset:96 ; GFX1250-NEXT: s_wait_xcnt 0x5 -; GFX1250-NEXT: v_dual_mov_b32 v0, s10 :: v_dual_mov_b32 v1, s46 -; GFX1250-NEXT: v_dual_mov_b32 v2, s11 :: v_dual_mov_b32 v3, s45 +; GFX1250-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s40 +; GFX1250-NEXT: v_dual_mov_b32 v4, s11 :: v_dual_mov_b32 v5, s39 ; GFX1250-NEXT: s_wait_xcnt 0x4 -; GFX1250-NEXT: v_mov_b32_e32 v4, s8 -; GFX1250-NEXT: s_ashr_i32 s39, s5, 31 -; GFX1250-NEXT: s_ashr_i32 s40, s4, 31 -; GFX1250-NEXT: v_dual_mov_b32 v5, s44 :: v_dual_mov_b32 v6, s9 +; GFX1250-NEXT: v_mov_b32_e32 v6, s8 +; GFX1250-NEXT: s_ashr_i32 s45, s5, 31 +; GFX1250-NEXT: s_ashr_i32 s46, s4, 31 +; GFX1250-NEXT: v_dual_mov_b32 v7, s42 :: v_dual_mov_b32 v8, s9 ; GFX1250-NEXT: s_wait_xcnt 0x3 -; GFX1250-NEXT: v_dual_mov_b32 v7, s43 :: v_dual_mov_b32 v8, s6 -; GFX1250-NEXT: s_ashr_i32 s35, s3, 31 -; GFX1250-NEXT: s_ashr_i32 s38, s2, 31 -; GFX1250-NEXT: v_dual_mov_b32 v9, s42 :: v_dual_mov_b32 v10, s7 +; GFX1250-NEXT: v_dual_mov_b32 v9, s41 :: v_dual_mov_b32 v10, s6 +; GFX1250-NEXT: s_ashr_i32 s47, s3, 31 +; GFX1250-NEXT: s_ashr_i32 s48, s2, 31 +; GFX1250-NEXT: v_dual_mov_b32 v11, s44 :: v_dual_mov_b32 v12, s7 ; GFX1250-NEXT: s_wait_xcnt 0x2 -; GFX1250-NEXT: v_dual_mov_b32 v11, s41 :: v_dual_mov_b32 v12, s4 -; GFX1250-NEXT: s_ashr_i32 s33, s1, 31 -; GFX1250-NEXT: s_ashr_i32 s34, s0, 31 -; GFX1250-NEXT: v_dual_mov_b32 v13, s40 :: v_dual_mov_b32 v14, s5 +; GFX1250-NEXT: v_dual_mov_b32 v13, s43 :: v_dual_mov_b32 v14, s4 +; GFX1250-NEXT: s_ashr_i32 s49, s1, 31 +; GFX1250-NEXT: s_ashr_i32 s50, s0, 31 +; GFX1250-NEXT: v_dual_mov_b32 v15, s46 :: v_dual_mov_b32 v16, s5 ; GFX1250-NEXT: s_wait_xcnt 0x1 -; GFX1250-NEXT: v_dual_mov_b32 v15, s39 :: v_dual_mov_b32 v16, s2 -; GFX1250-NEXT: v_dual_mov_b32 v17, s38 :: v_dual_mov_b32 v18, s3 +; GFX1250-NEXT: v_dual_mov_b32 v17, s45 :: v_dual_mov_b32 v18, s2 +; GFX1250-NEXT: v_dual_mov_b32 v19, s48 :: v_dual_mov_b32 v20, s3 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: v_dual_mov_b32 v19, s35 :: v_dual_mov_b32 v20, s0 -; GFX1250-NEXT: v_dual_mov_b32 v21, s34 :: v_dual_mov_b32 v22, s1 -; GFX1250-NEXT: v_mov_b32_e32 v23, s33 +; GFX1250-NEXT: v_dual_mov_b32 v21, s47 :: v_dual_mov_b32 v22, s0 +; GFX1250-NEXT: v_dual_mov_b32 v23, s50 :: v_dual_mov_b32 v24, s1 +; GFX1250-NEXT: v_mov_b32_e32 v25, s49 ; GFX1250-NEXT: s_clause 0x5 -; GFX1250-NEXT: global_store_b128 v24, v[0:3], s[36:37] offset:80 -; GFX1250-NEXT: global_store_b128 v24, v[4:7], s[36:37] offset:64 -; GFX1250-NEXT: global_store_b128 v24, v[8:11], s[36:37] offset:48 -; GFX1250-NEXT: global_store_b128 v24, v[12:15], s[36:37] offset:32 -; GFX1250-NEXT: global_store_b128 v24, v[16:19], s[36:37] offset:16 -; GFX1250-NEXT: global_store_b128 v24, v[20:23], s[36:37] +; GFX1250-NEXT: global_store_b128 v0, v[2:5], s[36:37] offset:80 +; GFX1250-NEXT: global_store_b128 v0, v[6:9], s[36:37] offset:64 +; GFX1250-NEXT: global_store_b128 v0, v[10:13], s[36:37] offset:48 +; GFX1250-NEXT: global_store_b128 v0, v[14:17], s[36:37] offset:32 +; GFX1250-NEXT: global_store_b128 v0, v[18:21], s[36:37] offset:16 +; GFX1250-NEXT: global_store_b128 v0, v[22:25], s[36:37] ; GFX1250-NEXT: s_endpgm %ld = load <32 x i32>, ptr addrspace(4) %in %ext = sext <32 x i32> %ld to <32 x i64> diff --git a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll index deb97a9812b42..b884689051f5b 100644 --- a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll +++ b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll @@ -7,11 +7,11 @@ define <2 x i32> @uniform_masked_load_ptr1_mask_v2i32(ptr addrspace(1) inreg noc ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB0_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] ; GFX942-NEXT: .LBB0_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] @@ -30,13 +30,12 @@ define <4 x i32> @uniform_masked_load_ptr1_mask_v4i32(ptr addrspace(1) inreg noc ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB1_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX942-NEXT: .LBB1_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] @@ -55,13 +54,12 @@ define <4 x float> @uniform_masked_load_ptr1_mask_v4f32(ptr addrspace(1) inreg n ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB2_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX942-NEXT: .LBB2_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] @@ -80,20 +78,16 @@ define <8 x i32> @uniform_masked_load_ptr1_mask_v8i32(ptr addrspace(1) inreg noc ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB3_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load -; GFX942-NEXT: global_load_dwordx4 v[4:7], v0, s[0:1] offset:16 -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16 +; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] ; GFX942-NEXT: .LBB3_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-NEXT: s_waitcnt vmcnt(0) @@ -111,20 +105,16 @@ define <8 x float> @uniform_masked_load_ptr1_mask_v8f32(ptr addrspace(1) inreg n ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB4_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load -; GFX942-NEXT: global_load_dwordx4 v[4:7], v0, s[0:1] offset:16 -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] +; GFX942-NEXT: v_mov_b32_e32 v8, 0 +; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16 +; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1] ; GFX942-NEXT: .LBB4_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX942-NEXT: s_waitcnt vmcnt(0) @@ -142,13 +132,12 @@ define <8 x i16> @uniform_masked_load_ptr1_mask_v8i16(ptr addrspace(1) inreg noc ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB5_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX942-NEXT: .LBB5_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] @@ -167,13 +156,12 @@ define <8 x half> @uniform_masked_load_ptr1_mask_v8f16(ptr addrspace(1) inreg no ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB6_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX942-NEXT: .LBB6_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] @@ -192,13 +180,12 @@ define <8 x bfloat> @uniform_masked_load_ptr1_mask_v8bf16(ptr addrspace(1) inreg ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX942-NEXT: s_cbranch_execz .LBB7_2 ; GFX942-NEXT: ; %bb.1: ; %cond.load +; GFX942-NEXT: v_mov_b32_e32 v0, 0 ; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1] ; GFX942-NEXT: .LBB7_2: ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll index 4f73e8e9c1883..64c602f81cb23 100644 --- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll +++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll @@ -5,11 +5,11 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s @@ -62,21 +62,37 @@ define half @v_maximumnum_f16(half %x, half %y) { ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f16: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f16: ; GFX10-SDAG: ; %bb.0: @@ -211,11 +227,17 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) { ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f16_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f16_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f16_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f16_nnan: ; GFX10: ; %bb.0: @@ -283,12 +305,19 @@ define half @v_maximumnum_f16_1.0(half %x) { ; GFX8-NEXT: v_max_f16_e32 v0, 1.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f16_1.0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f16_e32 v0, 1.0, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f16_1.0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f16_e32 v0, 1.0, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f16_1.0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f16_e32 v0, 1.0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f16_1.0: ; GFX10: ; %bb.0: @@ -373,21 +402,37 @@ define float @v_maximumnum_f32(float %x, float %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f32: ; GFX10-SDAG: ; %bb.0: @@ -461,11 +506,17 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_nnan: ; GFX10: ; %bb.0: @@ -525,21 +576,37 @@ define double @v_maximumnum_f64(double %x, double %y) { ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f64: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f64: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f64: ; GFX10-SDAG: ; %bb.0: @@ -617,11 +684,17 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) { ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f64_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f64_nnan: ; GFX10: ; %bb.0: @@ -663,12 +736,19 @@ define float @v_maximumnum_f32_1.0(float %x) { ; GFX8-NEXT: v_max_f32_e32 v0, 1.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_1.0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, 1.0, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_1.0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f32_e32 v0, 1.0, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_1.0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f32_e32 v0, 1.0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_1.0: ; GFX10: ; %bb.0: @@ -717,13 +797,21 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_rhs_not_snan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_rhs_not_snan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_rhs_not_snan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_rhs_not_snan: ; GFX10: ; %bb.0: @@ -774,13 +862,21 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_lhs_not_snan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_lhs_not_snan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_lhs_not_snan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_lhs_not_snan: ; GFX10: ; %bb.0: @@ -831,13 +927,21 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_both_operands_not_snan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_both_operands_not_snan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_both_operands_not_snan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_both_operands_not_snan: ; GFX10: ; %bb.0: @@ -887,12 +991,19 @@ define double @v_maximumnum_f64_1.0(double %x) { ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f64_1.0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f64_1.0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f64_1.0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], 1.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f64_1.0: ; GFX10: ; %bb.0: @@ -2190,21 +2301,37 @@ define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2303,21 +2430,37 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2417,21 +2560,37 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_fabs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_fabs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs: ; GFX10-SDAG: ; %bb.0: @@ -2531,21 +2690,37 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX900-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f32_fneg: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX950-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f32_fneg: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg: ; GFX10-SDAG: ; %bb.0: @@ -2648,21 +2823,37 @@ define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) { ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2808,21 +2999,37 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) { ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2969,21 +3176,37 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) { ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_fabs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_fabs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs: ; GFX10-SDAG: ; %bb.0: @@ -3130,21 +3353,37 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) { ; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX900-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f16_fneg: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX950-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f16_fneg: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg: ; GFX10-SDAG: ; %bb.0: @@ -3288,21 +3527,37 @@ define double @v_maximumnum_f64_fneg(double %x, double %y) { ; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_f64_fneg: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_f64_fneg: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_f64_fneg: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_f64_fneg: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_f64_fneg: ; GFX10-SDAG: ; %bb.0: @@ -3564,11 +3819,17 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v2f16_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v2f16_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v2f16_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v2f16_nnan: ; GFX10: ; %bb.0: @@ -3663,16 +3924,16 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_v3f16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_v3f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_maximumnum_v3f16: ; GFX900-GISEL: ; %bb.0: @@ -3685,6 +3946,17 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) { ; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v2 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX950-SDAG-LABEL: v_maximumnum_v3f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX950-GISEL-LABEL: v_maximumnum_v3f16: ; GFX950-GISEL: ; %bb.0: ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3834,19 +4106,33 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_v3f16_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v3f16_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v3f16_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX950-GISEL-LABEL: v_maximumnum_v3f16_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v3f16_nnan: ; GFX10: ; %bb.0: @@ -4157,12 +4443,19 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v4f16_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v4f16_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v4f16_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v4f16_nnan: ; GFX10: ; %bb.0: @@ -6691,27 +6984,49 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_v2f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_v2f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_v2f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v2f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v2f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v2f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_v2f32: ; GFX10-SDAG: ; %bb.0: @@ -6797,12 +7112,19 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { ; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v2f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v2f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v2f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v2 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v2f32_nnan: ; GFX10: ; %bb.0: @@ -6887,33 +7209,61 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_v3f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_v3f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_v3f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v3f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v3f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v3f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_v3f32: ; GFX10-SDAG: ; %bb.0: @@ -7015,13 +7365,21 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { ; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v3f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v3f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v3f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v3 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX950-NEXT: v_max_f32_e32 v2, v2, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v3f32_nnan: ; GFX10: ; %bb.0: @@ -7121,39 +7479,73 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) { ; GFX8-GISEL-NEXT: v_max_f32_e32 v3, v3, v4 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_v4f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_maximumnum_v4f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v4 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_v4f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v4f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v4f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v4f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v4 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_maximumnum_v4f32: ; GFX10-SDAG: ; %bb.0: @@ -7267,14 +7659,23 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { ; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v4f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_max_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_max_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v4f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v4f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v4 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v5 +; GFX950-NEXT: v_max_f32_e32 v2, v2, v6 +; GFX950-NEXT: v_max_f32_e32 v3, v3, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v4f32_nnan: ; GFX10: ; %bb.0: @@ -7376,12 +7777,12 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) { ; GFX950-SDAG-LABEL: v_maximumnum_v2f64: ; GFX950-SDAG: ; %bb.0: ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5] ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-GISEL-LABEL: v_maximumnum_v2f64: @@ -7491,12 +7892,26 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { ; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v2f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v2f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v2f64_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v2f64_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v2f64_nnan: ; GFX10: ; %bb.0: @@ -7614,15 +8029,15 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) { ; GFX950-SDAG-LABEL: v_maximumnum_v3f64: ; GFX950-SDAG: ; %bb.0: ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7] ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-GISEL-LABEL: v_maximumnum_v3f64: @@ -7755,13 +8170,29 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { ; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v3f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v3f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v3f64_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v3f64_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v3f64_nnan: ; GFX10: ; %bb.0: @@ -7900,18 +8331,18 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) { ; GFX950-SDAG-LABEL: v_maximumnum_v4f64: ; GFX950-SDAG: ; %bb.0: ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9] ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-GISEL-LABEL: v_maximumnum_v4f64: @@ -8067,14 +8498,32 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { ; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v4f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] -; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] -; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v4f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v4f64_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_maximumnum_v4f64_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11] +; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13] +; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v4f64_nnan: ; GFX10: ; %bb.0: @@ -8136,11 +8585,17 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 { ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f16_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f16_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f16_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f16_no_ieee: ; GFX10: ; %bb.0: @@ -8241,11 +8696,17 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 { ; GFX8-NEXT: v_max_f16_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f16_nan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f16_nan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f16_nan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f16_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f16_nan_no_ieee: ; GFX10: ; %bb.0: @@ -8301,11 +8762,17 @@ define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_no_ieee: ; GFX10: ; %bb.0: @@ -8359,11 +8826,17 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 { ; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f32_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f32_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f32_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f32_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8403,11 +8876,17 @@ define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 { ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f64_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f64_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f64_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f64_no_ieee: ; GFX10: ; %bb.0: @@ -8463,11 +8942,17 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 { ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_f64_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_f64_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_f64_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_f64_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8539,11 +9024,17 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v2f16_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v2f16_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v2f16_no_ieee: ; GFX10: ; %bb.0: @@ -8631,11 +9122,17 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v2f16_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v2f16_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_max_f16 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v2f16_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8718,19 +9215,33 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX950-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v3f16_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8829,12 +9340,19 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_maximumnum_v4f16_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX900-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_maximumnum_v4f16_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_max_f16 v0, v0, v2 +; GFX950-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_maximumnum_v4f16_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8865,6 +9383,3 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) } attributes #0 = { "amdgpu-ieee"="false" } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX900: {{.*}} -; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll index 3b8efafba06f4..fef0adf3b5b32 100644 --- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll @@ -2435,39 +2435,40 @@ define amdgpu_kernel void @test_mfma_nested_loop_zeroinit(ptr addrspace(1) %arg) ; ; GFX942-LABEL: test_mfma_nested_loop_zeroinit: ; GFX942: ; %bb.0: ; %entry -; GFX942-NEXT: v_accvgpr_write_b32 a0, 0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 +; GFX942-NEXT: v_accvgpr_write_b32 a0, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a3, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a5, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a7, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a9, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a11, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a13, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a15, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a17, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a19, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a21, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a23, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a25, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a27, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a29, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a31, v1 ; GFX942-NEXT: s_mov_b32 s0, 0 -; GFX942-NEXT: v_accvgpr_mov_b32 a1, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a2, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a3, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a4, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a5, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a6, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a7, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a8, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a9, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a10, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a11, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a12, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a13, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a14, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a15, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a16, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a17, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a18, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a19, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a20, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a21, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a22, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a23, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a24, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a25, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a26, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a27, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a28, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a29, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a30, a0 -; GFX942-NEXT: v_accvgpr_mov_b32 a31, a0 +; GFX942-NEXT: v_accvgpr_write_b32 a1, v1 +; GFX942-NEXT: v_accvgpr_write_b32 a2, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a4, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a6, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a8, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a10, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a12, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a14, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a16, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a18, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a20, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a22, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a24, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a26, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a28, v0 +; GFX942-NEXT: v_accvgpr_write_b32 a30, v0 ; GFX942-NEXT: v_mov_b32_e32 v0, 2.0 ; GFX942-NEXT: v_mov_b32_e32 v1, 1.0 ; GFX942-NEXT: .LBB9_1: ; %for.cond.preheader diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll index 558006d2b6957..9c4a1ca797110 100644 --- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll +++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll @@ -5,11 +5,11 @@ ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-GISEL %s -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s @@ -62,21 +62,37 @@ define half @v_minimumnum_f16(half %x, half %y) { ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f16: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f16: ; GFX10-SDAG: ; %bb.0: @@ -211,11 +227,17 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) { ; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f16_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f16_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f16_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f16_nnan: ; GFX10: ; %bb.0: @@ -283,12 +305,19 @@ define half @v_minimumnum_f16_1.0(half %x) { ; GFX8-NEXT: v_min_f16_e32 v0, 1.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f16_1.0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f16_e32 v0, 1.0, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f16_1.0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-NEXT: v_min_f16_e32 v0, 1.0, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f16_1.0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-NEXT: v_min_f16_e32 v0, 1.0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f16_1.0: ; GFX10: ; %bb.0: @@ -373,21 +402,37 @@ define float @v_minimumnum_f32(float %x, float %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f32: ; GFX10-SDAG: ; %bb.0: @@ -461,11 +506,17 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_nnan: ; GFX10: ; %bb.0: @@ -525,21 +576,37 @@ define double @v_minimumnum_f64(double %x, double %y) { ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f64: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f64: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f64: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f64: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f64: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f64: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f64: ; GFX10-SDAG: ; %bb.0: @@ -617,11 +684,17 @@ define double @v_minimumnum_f64_nnan(double %x, double %y) { ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f64_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f64_nnan: ; GFX10: ; %bb.0: @@ -663,12 +736,19 @@ define float @v_minimumnum_f32_1.0(float %x) { ; GFX8-NEXT: v_min_f32_e32 v0, 1.0, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_1.0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, 1.0, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_1.0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_min_f32_e32 v0, 1.0, v0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_1.0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_min_f32_e32 v0, 1.0, v0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_1.0: ; GFX10: ; %bb.0: @@ -717,13 +797,21 @@ define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_rhs_not_snan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_rhs_not_snan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_rhs_not_snan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_rhs_not_snan: ; GFX10: ; %bb.0: @@ -774,13 +862,21 @@ define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_lhs_not_snan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_lhs_not_snan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_lhs_not_snan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_lhs_not_snan: ; GFX10: ; %bb.0: @@ -831,13 +927,21 @@ define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_both_operands_not_snan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_both_operands_not_snan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_both_operands_not_snan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_both_operands_not_snan: ; GFX10: ; %bb.0: @@ -887,12 +991,19 @@ define double @v_minimumnum_f64_1.0(double %x) { ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f64_1.0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f64_1.0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f64_1.0: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], 1.0 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f64_1.0: ; GFX10: ; %bb.0: @@ -2015,21 +2126,37 @@ define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2128,21 +2255,37 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2242,21 +2385,37 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_fabs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_fabs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0| +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs: ; GFX10-SDAG: ; %bb.0: @@ -2356,21 +2515,37 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 -; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX900-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f32_fneg: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX950-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f32_fneg: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0 +; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg: ; GFX10-SDAG: ; %bb.0: @@ -2473,21 +2648,37 @@ define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) { ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2633,21 +2824,37 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) { ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| -; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1| +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs: ; GFX10-SDAG: ; %bb.0: @@ -2794,21 +3001,37 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) { ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| -; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16_fabs: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16_fabs: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0| +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1| +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs: ; GFX10-SDAG: ; %bb.0: @@ -2955,21 +3178,37 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) { ; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 -; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 -; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX900-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f16_fneg: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX950-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f16_fneg: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0 +; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1 +; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg: ; GFX10-SDAG: ; %bb.0: @@ -3113,21 +3352,37 @@ define double @v_minimumnum_f64_fneg(double %x, double %y) { ; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_f64_fneg: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_f64_fneg: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] -; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] -; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX900-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX900-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_f64_fneg: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX950-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_f64_fneg: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] +; GFX950-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3] +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_f64_fneg: ; GFX10-SDAG: ; %bb.0: @@ -3389,11 +3644,17 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v2f16_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v2f16_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v2f16_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v2f16_nnan: ; GFX10: ; %bb.0: @@ -3488,16 +3749,16 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_v3f16: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 -; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 -; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 -; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 -; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_v3f16: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-GISEL-LABEL: v_minimumnum_v3f16: ; GFX900-GISEL: ; %bb.0: @@ -3510,6 +3771,17 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) { ; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v2 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; GFX950-SDAG-LABEL: v_minimumnum_v3f16: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3 +; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0 +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX950-GISEL-LABEL: v_minimumnum_v3f16: ; GFX950-GISEL: ; %bb.0: ; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -3659,19 +3931,33 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_v3f16_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v3f16_nnan: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v3f16_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX950-GISEL-LABEL: v_minimumnum_v3f16_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v3f16_nnan: ; GFX10: ; %bb.0: @@ -3982,12 +4268,19 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) { ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v4f16_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v4f16_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v4f16_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v4f16_nnan: ; GFX10: ; %bb.0: @@ -6516,27 +6809,49 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_v2f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_v2f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 -; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_v2f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v2f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v2f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v2f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3 +; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v2 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_v2f32: ; GFX10-SDAG: ; %bb.0: @@ -6622,12 +6937,19 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) { ; GFX8-NEXT: v_min_f32_e32 v1, v1, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v2f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v2 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v2f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v2f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f32_e32 v0, v0, v2 +; GFX950-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v2f32_nnan: ; GFX10: ; %bb.0: @@ -6712,33 +7034,61 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_v3f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_v3f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 -; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 -; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_v3f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v3f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v3f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v3f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4 +; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v5, v5 +; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_v3f32: ; GFX10-SDAG: ; %bb.0: @@ -6840,13 +7190,21 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) { ; GFX8-NEXT: v_min_f32_e32 v2, v2, v5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v3f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v3f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v3f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f32_e32 v0, v0, v3 +; GFX950-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX950-NEXT: v_min_f32_e32 v2, v2, v5 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v3f32_nnan: ; GFX10: ; %bb.0: @@ -6946,39 +7304,73 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) { ; GFX8-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_v4f32: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 -; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 -; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-GISEL-LABEL: v_minimumnum_v4f32: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 -; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 -; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 -; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 -; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 -; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 -; GFX9-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_v4f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v4f32: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX900-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v4f32: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-SDAG-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v4f32: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4 +; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5 +; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6 +; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 +; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3 +; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7 +; GFX950-GISEL-NEXT: v_min_f32_e32 v3, v3, v4 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SDAG-LABEL: v_minimumnum_v4f32: ; GFX10-SDAG: ; %bb.0: @@ -7092,14 +7484,23 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) { ; GFX8-NEXT: v_min_f32_e32 v3, v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v4f32_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v4 -; GFX9-NEXT: v_min_f32_e32 v1, v1, v5 -; GFX9-NEXT: v_min_f32_e32 v2, v2, v6 -; GFX9-NEXT: v_min_f32_e32 v3, v3, v7 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v4f32_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX900-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX900-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX900-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v4f32_nnan: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f32_e32 v0, v0, v4 +; GFX950-NEXT: v_min_f32_e32 v1, v1, v5 +; GFX950-NEXT: v_min_f32_e32 v2, v2, v6 +; GFX950-NEXT: v_min_f32_e32 v3, v3, v7 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v4f32_nnan: ; GFX10: ; %bb.0: @@ -7201,12 +7602,12 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) { ; GFX950-SDAG-LABEL: v_minimumnum_v2f64: ; GFX950-SDAG: ; %bb.0: ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-GISEL-LABEL: v_minimumnum_v2f64: @@ -7316,12 +7717,26 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) { ; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v2f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v2f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v2f64_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v2f64_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5] +; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v2f64_nnan: ; GFX10: ; %bb.0: @@ -7439,15 +7854,15 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) { ; GFX950-SDAG-LABEL: v_minimumnum_v3f64: ; GFX950-SDAG: ; %bb.0: ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-GISEL-LABEL: v_minimumnum_v3f64: @@ -7580,13 +7995,29 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) { ; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v3f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v3f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v3f64_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v3f64_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7] +; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] +; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v3f64_nnan: ; GFX10: ; %bb.0: @@ -7725,18 +8156,18 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) { ; GFX950-SDAG-LABEL: v_minimumnum_v4f64: ; GFX950-SDAG: ; %bb.0: ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15] +; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] +; GFX950-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13] +; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] +; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11] +; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] ; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9] ; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] +; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] ; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11] -; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3] -; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13] -; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5] -; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9] -; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15] -; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7] -; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9] ; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-GISEL-LABEL: v_minimumnum_v4f64: @@ -7892,14 +8323,32 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) { ; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v4f64_nnan: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] -; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] -; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] -; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v4f64_nnan: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v4f64_nnan: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-GISEL-LABEL: v_minimumnum_v4f64_nnan: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9] +; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11] +; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13] +; GFX950-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15] +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v4f64_nnan: ; GFX10: ; %bb.0: @@ -7961,11 +8410,17 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 { ; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f16_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f16_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f16_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f16_no_ieee: ; GFX10: ; %bb.0: @@ -8066,11 +8521,17 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 { ; GFX8-NEXT: v_min_f16_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f16_nan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f16_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f16_nan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f16_nan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f16_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f16_nan_no_ieee: ; GFX10: ; %bb.0: @@ -8126,11 +8587,17 @@ define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_no_ieee: ; GFX10: ; %bb.0: @@ -8184,11 +8651,17 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 { ; GFX8-NEXT: v_min_f32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f32_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f32_e32 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f32_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f32_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f32_e32 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f32_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8228,11 +8701,17 @@ define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 { ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f64_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f64_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f64_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f64_no_ieee: ; GFX10: ; %bb.0: @@ -8288,11 +8767,17 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 { ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_f64_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_f64_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_f64_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_f64_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8364,11 +8849,17 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 { ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v2f16_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v2f16_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v2f16_no_ieee: ; GFX10: ; %bb.0: @@ -8456,11 +8947,17 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y) ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_min_f16 v0, v0, v1 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v2f16_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_min_f16 v0, v0, v1 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v2f16_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8543,19 +9040,33 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y) ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: -; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; GFX900-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX900-GISEL: ; %bb.0: +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: -; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX950-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v3f16_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8654,12 +9165,19 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX900-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX950-LABEL: v_minimumnum_v4f16_nnan_no_ieee: +; GFX950: ; %bb.0: +; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX950-NEXT: v_pk_min_f16 v0, v0, v2 +; GFX950-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX950-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_minimumnum_v4f16_nnan_no_ieee: ; GFX10: ; %bb.0: @@ -8690,6 +9208,3 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y) } attributes #0 = { "amdgpu-ieee"="false" } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX900: {{.*}} -; GFX950: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll index 7e3d5c97391e1..57e1b87a01063 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.ll @@ -3232,29 +3232,28 @@ define amdgpu_kernel void @s_mul_i128(ptr addrspace(1) %out, [8 x i32], i128 %a, ; GFX1250-NEXT: s_mov_b32 s2, s8 ; GFX1250-NEXT: s_and_b64 s[4:5], s[12:13], s[4:5] ; GFX1250-NEXT: s_mov_b32 s6, s13 -; GFX1250-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13] -; GFX1250-NEXT: s_mul_u64 s[12:13], s[4:5], s[2:3] +; GFX1250-NEXT: s_mul_u64 s[22:23], s[4:5], s[2:3] +; GFX1250-NEXT: s_mul_u64 s[24:25], s[6:7], s[2:3] +; GFX1250-NEXT: s_mov_b32 s2, s23 ; GFX1250-NEXT: s_mov_b32 s16, s9 ; GFX1250-NEXT: s_mul_u64 s[8:9], s[8:9], s[14:15] -; GFX1250-NEXT: s_mul_u64 s[14:15], s[6:7], s[2:3] -; GFX1250-NEXT: s_mov_b32 s2, s13 +; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[24:25], s[2:3] ; GFX1250-NEXT: s_mul_u64 s[4:5], s[4:5], s[16:17] -; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[14:15], s[2:3] -; GFX1250-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17] ; GFX1250-NEXT: s_mov_b32 s2, s15 ; GFX1250-NEXT: s_mov_b32 s15, s3 -; GFX1250-NEXT: s_mov_b32 s13, s3 +; GFX1250-NEXT: s_mul_u64 s[10:11], s[10:11], s[12:13] ; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[4:5], s[14:15] -; GFX1250-NEXT: s_add_nc_u64 s[8:9], s[10:11], s[8:9] +; GFX1250-NEXT: s_mul_u64 s[6:7], s[6:7], s[16:17] ; GFX1250-NEXT: s_mov_b32 s18, s5 -; GFX1250-NEXT: s_mov_b32 s21, s4 +; GFX1250-NEXT: s_mov_b32 s23, s3 ; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[18:19] -; GFX1250-NEXT: s_or_b64 s[4:5], s[12:13], s[20:21] +; GFX1250-NEXT: s_mov_b32 s21, s4 ; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[6:7], s[2:3] -; GFX1250-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 -; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[8:9] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[10:11], s[8:9] +; GFX1250-NEXT: s_or_b64 s[6:7], s[22:23], s[20:21] +; GFX1250-NEXT: s_add_nc_u64 s[2:3], s[2:3], s[4:5] +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX1250-NEXT: s_mov_b32 s3, 0x31016000 ; GFX1250-NEXT: s_mov_b32 s2, -1 ; GFX1250-NEXT: buffer_store_b128 v[0:3], off, s[0:3], null diff --git a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll index cf244f0b1f884..cba06c17b51ef 100644 --- a/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll +++ b/llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll @@ -7,34 +7,32 @@ define amdgpu_kernel void @matmul_kernel(i32 %a0, i32 %a1) { ; GFX942: ; %bb.0: ; %entry ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX942-NEXT: v_accvgpr_write_b32 a2, 0 -; GFX942-NEXT: s_mov_b32 s2, 0 +; GFX942-NEXT: s_mov_b32 s4, 0 ; GFX942-NEXT: v_accvgpr_write_b32 a1, 0 -; GFX942-NEXT: s_mov_b32 s3, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_cmp_lg_u32 s0, 0 ; GFX942-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GFX942-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; GFX942-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0 ; GFX942-NEXT: s_branch .LBB0_2 ; GFX942-NEXT: .LBB0_1: ; %bb2 ; GFX942-NEXT: ; in Loop: Header=BB0_2 Depth=1 -; GFX942-NEXT: s_or_b32 s4, s3, 1 -; GFX942-NEXT: s_ashr_i32 s5, s3, 31 -; GFX942-NEXT: s_mov_b32 s3, s2 -; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-NEXT: s_nop 2 ; GFX942-NEXT: v_accvgpr_mov_b32 a0, a2 ; GFX942-NEXT: v_accvgpr_mov_b32 a2, a1 ; GFX942-NEXT: v_accvgpr_mov_b32 a3, a1 -; GFX942-NEXT: s_and_b32 s3, s5, s4 -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mfma_f32_16x16x16_f16 a[2:5], v[2:3], v[2:3], a[0:3] +; GFX942-NEXT: s_or_b32 s2, s4, 1 +; GFX942-NEXT: s_ashr_i32 s3, s4, 31 +; GFX942-NEXT: v_mfma_f32_16x16x16_f16 a[2:5], v[0:1], v[0:1], a[0:3] +; GFX942-NEXT: s_and_b32 s4, s3, s2 ; GFX942-NEXT: s_cbranch_execz .LBB0_4 ; GFX942-NEXT: .LBB0_2: ; %bb ; GFX942-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX942-NEXT: s_and_b64 vcc, exec, s[0:1] ; GFX942-NEXT: s_cbranch_vccz .LBB0_1 ; GFX942-NEXT: ; %bb.3: -; GFX942-NEXT: ; implicit-def: $sgpr3 +; GFX942-NEXT: ; implicit-def: $sgpr4 ; GFX942-NEXT: ; implicit-def: $agpr2 ; GFX942-NEXT: .LBB0_4: ; %common.ret ; GFX942-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll index 9f27e1ffd9130..788fe0474738e 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll @@ -715,18 +715,43 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) { ; GFX900-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-NEXT: s_endpgm ; -; PACKED-LABEL: fadd_v2_v_lit_hi0: -; PACKED: ; %bb.0: -; PACKED-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; PACKED-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; PACKED-NEXT: v_lshlrev_b32_e32 v2, 3, v0 -; PACKED-NEXT: s_mov_b64 s[2:3], 0x3f800000 -; PACKED-NEXT: s_waitcnt lgkmcnt(0) -; PACKED-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] -; PACKED-NEXT: s_waitcnt vmcnt(0) -; PACKED-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] -; PACKED-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] -; PACKED-NEXT: s_endpgm +; GFX90A-SDAG-LABEL: fadd_v2_v_lit_hi0: +; GFX90A-SDAG: ; %bb.0: +; GFX90A-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX90A-SDAG-NEXT: s_mov_b64 s[2:3], 0x3f800000 +; GFX90A-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] +; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX90A-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] +; GFX90A-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX90A-SDAG-NEXT: s_endpgm +; +; PACKED-GISEL-LABEL: fadd_v2_v_lit_hi0: +; PACKED-GISEL: ; %bb.0: +; PACKED-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; PACKED-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; PACKED-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; PACKED-GISEL-NEXT: s_mov_b64 s[2:3], 0x3f800000 +; PACKED-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; PACKED-GISEL-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] +; PACKED-GISEL-NEXT: s_waitcnt vmcnt(0) +; PACKED-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[2:3] +; PACKED-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; PACKED-GISEL-NEXT: s_endpgm +; +; GFX942-SDAG-LABEL: fadd_v2_v_lit_hi0: +; GFX942-SDAG: ; %bb.0: +; GFX942-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] +; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX942-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], 1.0 +; GFX942-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX942-SDAG-NEXT: s_endpgm ; ; GFX1250-SDAG-LABEL: fadd_v2_v_lit_hi0: ; GFX1250-SDAG: ; %bb.0: @@ -3780,6 +3805,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GFX90A-SDAG: {{.*}} -; GFX942-SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll index 4d367ef7ffd9d..a0b49f47dac8c 100644 --- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll @@ -1215,10 +1215,8 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out, ; GFX942-NEXT: ; %bb.2: ; GFX942-NEXT: .LBB25_0: ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, s6 -; GFX942-NEXT: v_mov_b32_e32 v1, s7 -; GFX942-NEXT: v_mov_b32_e32 v2, s8 -; GFX942-NEXT: v_mov_b32_e32 v3, s9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[8:9] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] ; GFX942-NEXT: s_endpgm ; @@ -1240,9 +1238,9 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out, ; ; GFX1250-LABEL: fp128_kernel_preload_arg: ; GFX1250: ; %bb.0: -; GFX1250-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v0, s6 -; GFX1250-NEXT: v_dual_mov_b32 v1, s7 :: v_dual_mov_b32 v2, s8 -; GFX1250-NEXT: v_mov_b32_e32 v3, s9 +; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7] +; GFX1250-NEXT: v_mov_b64_e32 v[2:3], s[8:9] +; GFX1250-NEXT: v_mov_b32_e32 v4, 0 ; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[2:3] ; GFX1250-NEXT: s_endpgm store fp128 %in, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll index b9e9893ede4e2..8c7fe50d4dade 100644 --- a/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll @@ -367,9 +367,7 @@ bb: define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 { ; CHECK-LABEL: illegal_mfma_after_rewrite: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_mov_b32 s0, 0 -; CHECK-NEXT: s_mov_b32 s1, s0 -; CHECK-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; CHECK-NEXT: v_mov_b64_e32 v[8:9], 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:3] ; CHECK-NEXT: ;;#ASMEND @@ -402,49 +400,48 @@ define amdgpu_kernel void @illegal_mfma_after_rewrite() #1 { ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[12:13], v[4:7] +; CHECK-NEXT: s_nop 2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v14 +; CHECK-NEXT: global_store_short v[8:9], v14, off ; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[26:29], v[8:9], v[8:9], v[4:7] +; CHECK-NEXT: buffer_wbl2 sc0 sc1 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_inv sc0 sc1 ; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[8:9], v[8:9], v[0:3] -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[22:25] ; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[4:7], v[8:9], v[8:9], v[26:29] -; CHECK-NEXT: s_nop 5 -; CHECK-NEXT: v_cvt_f16_f32_e32 v23, v14 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[14:17], v[8:9], v[8:9], v[18:21] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[22:25], v[8:9], v[8:9], v[22:25] ; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[12:13], v[8:9], v[0:3] -; CHECK-NEXT: s_nop 1 -; CHECK-NEXT: v_accvgpr_read_b32 v19, a3 -; CHECK-NEXT: v_accvgpr_read_b32 v18, a2 -; CHECK-NEXT: v_mov_b64_e32 v[20:21], 0 -; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: v_accvgpr_read_b32 v17, a1 -; CHECK-NEXT: v_accvgpr_read_b32 v16, a0 -; CHECK-NEXT: v_cvt_f16_f32_e32 v15, v22 -; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v14 -; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[8:9], v[8:9], v[16:19] +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[16:19], v[8:9], v[8:9], v[18:21] +; CHECK-NEXT: s_nop 4 +; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v22 +; CHECK-NEXT: global_store_short v[8:9], v14, off +; CHECK-NEXT: v_accvgpr_read_b32 v21, a3 +; CHECK-NEXT: v_accvgpr_read_b32 v20, a2 +; CHECK-NEXT: v_accvgpr_read_b32 v19, a1 +; CHECK-NEXT: v_accvgpr_read_b32 v18, a0 ; CHECK-NEXT: v_cvt_f16_f32_e32 v12, v0 -; CHECK-NEXT: global_store_short v[20:21], v23, off -; CHECK-NEXT: buffer_wbl2 sc0 sc1 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_inv sc0 sc1 ; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[0:3], v[10:11], v[8:9], v[4:7] -; CHECK-NEXT: global_store_short v[20:21], v15, off +; CHECK-NEXT: v_cvt_f16_f32_e32 v15, v16 ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v14, off -; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v16 +; CHECK-NEXT: global_store_short v[8:9], v15, off +; CHECK-NEXT: v_mfma_f32_16x16x16_f16 v[18:21], v[8:9], v[8:9], v[18:21] ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v14, off ; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0 +; CHECK-NEXT: s_nop 2 +; CHECK-NEXT: v_cvt_f16_f32_e32 v14, v18 +; CHECK-NEXT: global_store_short v[8:9], v14, off ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v12, off +; CHECK-NEXT: global_store_short v[8:9], v12, off ; CHECK-NEXT: buffer_wbl2 sc0 sc1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_inv sc0 sc1 -; CHECK-NEXT: global_store_short v[20:21], v0, off +; CHECK-NEXT: global_store_short v[8:9], v0, off ; CHECK-NEXT: s_endpgm entry: %k0 = call <4 x float> asm sideeffect "; def $0", "=s"() diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll index 192bd2073886a..5a7f7fb00f04e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-to-vreg1-copy.ll @@ -18,12 +18,11 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) { ; GCN-NEXT: s_addc_u32 s0, 1, 0 ; GCN-NEXT: v_readfirstlane_b32 s2, v1 ; GCN-NEXT: s_cmp_ge_u32 s3, s4 -; GCN-NEXT: s_cselect_b32 s4, s0, s2 ; GCN-NEXT: v_mov_b32_e32 v1, 0 -; GCN-NEXT: s_cmp_lg_u64 0, 0 +; GCN-NEXT: s_cselect_b32 s4, s0, s2 ; GCN-NEXT: s_mov_b64 s[0:1], 0 ; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0 +; GCN-NEXT: s_mov_b64 s[2:3], 0 ; GCN-NEXT: s_branch .LBB0_3 ; GCN-NEXT: .LBB0_1: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1 @@ -33,9 +32,9 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) { ; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: s_and_b64 s[4:5], exec, s[8:9] -; GCN-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] +; GCN-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] ; GCN-NEXT: s_mov_b32 s4, 0 -; GCN-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GCN-NEXT: s_andn2_b64 exec, exec, s[2:3] ; GCN-NEXT: s_cbranch_execz .LBB0_8 ; GCN-NEXT: .LBB0_3: ; %.lr.ph27 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -57,7 +56,7 @@ define amdgpu_kernel void @copy_to_vreg_1(i32 %0) { ; GCN-NEXT: s_cbranch_execz .LBB0_2 ; GCN-NEXT: ; %bb.6: ; %pred.store.continue ; GCN-NEXT: ; in Loop: Header=BB0_3 Depth=1 -; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[2:3] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[0:1] ; GCN-NEXT: s_xor_b64 s[6:7], exec, s[6:7] ; GCN-NEXT: s_cbranch_execz .LBB0_1 ; GCN-NEXT: ; %bb.7: ; %pred.store.if41 diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll index 51dc9a51ec9d0..3fd7f1cb481a4 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll @@ -88,8 +88,7 @@ define void @v_shuffle_v2i64_v2i64__1_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -144,8 +143,7 @@ define void @v_shuffle_v2i64_v2i64__3_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -204,10 +202,8 @@ define void @v_shuffle_v2i64_v2i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -262,8 +258,7 @@ define void @v_shuffle_v2i64_v2i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -309,8 +304,7 @@ define void @v_shuffle_v2i64_v2i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -355,8 +349,7 @@ define void @v_shuffle_v2i64_v2i64__3_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -401,8 +394,7 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -446,8 +438,7 @@ define void @v_shuffle_v2i64_v2i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -492,8 +483,7 @@ define void @v_shuffle_v2i64_v2i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -537,8 +527,7 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -660,8 +649,7 @@ define void @v_shuffle_v2i64_v2i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -794,8 +782,7 @@ define void @v_shuffle_v2i64_v2i64__1_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -900,8 +887,7 @@ define void @v_shuffle_v2i64_v2i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -956,8 +942,7 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1095,8 +1080,7 @@ define void @s_shuffle_v2i64_v2i64__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1154,8 +1138,7 @@ define void @s_shuffle_v2i64_v2i64__3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1213,10 +1196,8 @@ define void @s_shuffle_v2i64_v2i64__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1270,8 +1251,7 @@ define void @s_shuffle_v2i64_v2i64__3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1320,10 +1300,8 @@ define void @s_shuffle_v2i64_v2i64__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1336,18 +1314,43 @@ define void @s_shuffle_v2i64_v2i64__3_2() { } define void @s_shuffle_v2i64_v2i64__3_3() { -; GFX9-LABEL: s_shuffle_v2i64_v2i64__3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v2i64__3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v2i64__3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> @@ -1388,8 +1391,7 @@ define void @s_shuffle_v2i64_v2i64__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1401,18 +1403,43 @@ define void @s_shuffle_v2i64_v2i64__u_0() { } define void @s_shuffle_v2i64_v2i64__0_0() { -; GFX9-LABEL: s_shuffle_v2i64_v2i64__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v2i64__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v2i64__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1456,10 +1483,8 @@ define void @s_shuffle_v2i64_v2i64__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1503,8 +1528,7 @@ define void @s_shuffle_v2i64_v2i64__2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1596,18 +1620,43 @@ define void @s_shuffle_v2i64_v2i64__0_1() { } define void @s_shuffle_v2i64_v2i64__1_1() { -; GFX9-LABEL: s_shuffle_v2i64_v2i64__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v2i64__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v2i64__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -1741,8 +1790,7 @@ define void @s_shuffle_v2i64_v2i64__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1850,8 +1898,7 @@ define void @s_shuffle_v2i64_v2i64__0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1905,8 +1952,7 @@ define void @s_shuffle_v2i64_v2i64__1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll index bc8a56a30d8f9..f54d45b1367cc 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll @@ -127,8 +127,7 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -223,8 +222,7 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -283,10 +281,8 @@ define void @v_shuffle_v2i64_v3i64__5_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -341,8 +337,7 @@ define void @v_shuffle_v2i64_v3i64__5_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -397,8 +392,7 @@ define void @v_shuffle_v2i64_v3i64__5_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v10 -; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -447,10 +441,8 @@ define void @v_shuffle_v2i64_v3i64__5_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -495,8 +487,7 @@ define void @v_shuffle_v2i64_v3i64__5_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -541,8 +532,7 @@ define void @v_shuffle_v2i64_v3i64__5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -587,8 +577,7 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -632,8 +621,7 @@ define void @v_shuffle_v2i64_v3i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -677,8 +665,7 @@ define void @v_shuffle_v2i64_v3i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -726,10 +713,8 @@ define void @v_shuffle_v2i64_v3i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -773,8 +758,7 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -828,8 +812,7 @@ define void @v_shuffle_v2i64_v3i64__4_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -952,8 +935,7 @@ define void @v_shuffle_v2i64_v3i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -997,8 +979,7 @@ define void @v_shuffle_v2i64_v3i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1091,8 +1072,7 @@ define void @v_shuffle_v2i64_v3i64__4_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1176,8 +1156,7 @@ define void @v_shuffle_v2i64_v3i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1260,8 +1239,7 @@ define void @v_shuffle_v2i64_v3i64__2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1354,8 +1332,7 @@ define void @v_shuffle_v2i64_v3i64__4_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1489,8 +1466,7 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1545,8 +1521,7 @@ define void @v_shuffle_v2i64_v3i64__4_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1641,8 +1616,7 @@ define void @v_shuffle_v2i64_v3i64__0_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1697,8 +1671,7 @@ define void @v_shuffle_v2i64_v3i64__1_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1753,8 +1726,7 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1839,8 +1811,7 @@ define void @v_shuffle_v2i64_v3i64__4_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1935,8 +1906,7 @@ define void @v_shuffle_v2i64_v3i64__0_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1991,8 +1961,7 @@ define void @v_shuffle_v2i64_v3i64__1_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2047,8 +2016,7 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v4 -; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2093,8 +2061,7 @@ define void @v_shuffle_v2i64_v3i64__3_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2232,8 +2199,7 @@ define void @s_shuffle_v2i64_v3i64__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2273,8 +2239,7 @@ define void @s_shuffle_v2i64_v3i64__2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2332,8 +2297,7 @@ define void @s_shuffle_v2i64_v3i64__4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2374,8 +2338,7 @@ define void @s_shuffle_v2i64_v3i64__5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2430,11 +2393,11 @@ define void @s_shuffle_v2i64_v3i64__5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2484,8 +2447,7 @@ define void @s_shuffle_v2i64_v3i64__5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2539,10 +2501,8 @@ define void @s_shuffle_v2i64_v3i64__5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2587,10 +2547,8 @@ define void @s_shuffle_v2i64_v3i64__5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2603,18 +2561,43 @@ define void @s_shuffle_v2i64_v3i64__5_3() { } define void @s_shuffle_v2i64_v3i64__5_4() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__5_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__5_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__5_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -2659,10 +2642,8 @@ define void @s_shuffle_v2i64_v3i64__5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2707,8 +2688,7 @@ define void @s_shuffle_v2i64_v3i64__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2720,18 +2700,43 @@ define void @s_shuffle_v2i64_v3i64__u_0() { } define void @s_shuffle_v2i64_v3i64__0_0() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -2775,10 +2780,8 @@ define void @s_shuffle_v2i64_v3i64__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2822,10 +2825,8 @@ define void @s_shuffle_v2i64_v3i64__2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2869,8 +2870,7 @@ define void @s_shuffle_v2i64_v3i64__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2924,13 +2924,11 @@ define void @s_shuffle_v2i64_v3i64__4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3023,18 +3021,43 @@ define void @s_shuffle_v2i64_v3i64__0_1() { } define void @s_shuffle_v2i64_v3i64__1_1() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3042,18 +3065,43 @@ define void @s_shuffle_v2i64_v3i64__1_1() { } define void @s_shuffle_v2i64_v3i64__2_1() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__2_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3142,8 +3190,7 @@ define void @s_shuffle_v2i64_v3i64__4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3188,8 +3235,7 @@ define void @s_shuffle_v2i64_v3i64__u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3201,18 +3247,43 @@ define void @s_shuffle_v2i64_v3i64__u_2() { } define void @s_shuffle_v2i64_v3i64__0_2() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__0_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -3256,10 +3327,8 @@ define void @s_shuffle_v2i64_v3i64__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3307,10 +3376,8 @@ define void @s_shuffle_v2i64_v3i64__2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3354,8 +3421,7 @@ define void @s_shuffle_v2i64_v3i64__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3412,10 +3478,8 @@ define void @s_shuffle_v2i64_v3i64__4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3514,8 +3578,7 @@ define void @s_shuffle_v2i64_v3i64__1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3555,8 +3618,7 @@ define void @s_shuffle_v2i64_v3i64__2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3618,10 +3680,8 @@ define void @s_shuffle_v2i64_v3i64__4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3716,8 +3776,7 @@ define void @s_shuffle_v2i64_v3i64__0_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3771,8 +3830,7 @@ define void @s_shuffle_v2i64_v3i64__1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3826,8 +3884,7 @@ define void @s_shuffle_v2i64_v3i64__2_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3881,18 +3938,43 @@ define void @s_shuffle_v2i64_v3i64__3_4() { } define void @s_shuffle_v2i64_v3i64__4_4() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -3933,8 +4015,7 @@ define void @s_shuffle_v2i64_v3i64__u_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3988,8 +4069,7 @@ define void @s_shuffle_v2i64_v3i64__0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4047,10 +4127,8 @@ define void @s_shuffle_v2i64_v3i64__1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4104,10 +4182,8 @@ define void @s_shuffle_v2i64_v3i64__2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4120,18 +4196,43 @@ define void @s_shuffle_v2i64_v3i64__2_5() { } define void @s_shuffle_v2i64_v3i64__3_5() { -; GFX9-LABEL: s_shuffle_v2i64_v3i64__3_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v3i64__3_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v3i64__3_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> @@ -4176,10 +4277,8 @@ define void @s_shuffle_v2i64_v3i64__4_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll index dd42a1dd44320..1c738b8f4f1d9 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll @@ -166,8 +166,7 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -302,8 +301,7 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -362,10 +360,8 @@ define void @v_shuffle_v2i64_v4i64__7_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -420,8 +416,7 @@ define void @v_shuffle_v2i64_v4i64__7_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v10 -; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -476,8 +471,7 @@ define void @v_shuffle_v2i64_v4i64__7_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v12 -; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -532,8 +526,7 @@ define void @v_shuffle_v2i64_v4i64__7_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -582,10 +575,8 @@ define void @v_shuffle_v2i64_v4i64__7_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -630,8 +621,7 @@ define void @v_shuffle_v2i64_v4i64__7_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -676,8 +666,7 @@ define void @v_shuffle_v2i64_v4i64__7_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -722,8 +711,7 @@ define void @v_shuffle_v2i64_v4i64__7_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -768,8 +756,7 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -813,8 +800,7 @@ define void @v_shuffle_v2i64_v4i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -858,8 +844,7 @@ define void @v_shuffle_v2i64_v4i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -903,8 +888,7 @@ define void @v_shuffle_v2i64_v4i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -952,10 +936,8 @@ define void @v_shuffle_v2i64_v4i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -999,8 +981,7 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1054,8 +1035,7 @@ define void @v_shuffle_v2i64_v4i64__5_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1110,8 +1090,7 @@ define void @v_shuffle_v2i64_v4i64__6_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1234,8 +1213,7 @@ define void @v_shuffle_v2i64_v4i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1279,8 +1257,7 @@ define void @v_shuffle_v2i64_v4i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v2 -; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1324,8 +1301,7 @@ define void @v_shuffle_v2i64_v4i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1418,8 +1394,7 @@ define void @v_shuffle_v2i64_v4i64__5_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1474,8 +1449,7 @@ define void @v_shuffle_v2i64_v4i64__6_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v2 -; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1559,8 +1533,7 @@ define void @v_shuffle_v2i64_v4i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1643,8 +1616,7 @@ define void @v_shuffle_v2i64_v4i64__2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1688,8 +1660,7 @@ define void @v_shuffle_v2i64_v4i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1782,8 +1753,7 @@ define void @v_shuffle_v2i64_v4i64__5_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1838,8 +1808,7 @@ define void @v_shuffle_v2i64_v4i64__6_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v4 -; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1923,8 +1892,7 @@ define void @v_shuffle_v2i64_v4i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1968,8 +1936,7 @@ define void @v_shuffle_v2i64_v4i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2052,8 +2019,7 @@ define void @v_shuffle_v2i64_v4i64__3_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2146,8 +2112,7 @@ define void @v_shuffle_v2i64_v4i64__5_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2202,8 +2167,7 @@ define void @v_shuffle_v2i64_v4i64__6_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v6 -; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2376,8 +2340,7 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2432,8 +2395,7 @@ define void @v_shuffle_v2i64_v4i64__5_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2478,8 +2440,7 @@ define void @v_shuffle_v2i64_v4i64__6_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2574,8 +2535,7 @@ define void @v_shuffle_v2i64_v4i64__0_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2630,8 +2590,7 @@ define void @v_shuffle_v2i64_v4i64__1_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2686,8 +2645,7 @@ define void @v_shuffle_v2i64_v4i64__2_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v8 -; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2742,8 +2700,7 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v6 -; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2828,8 +2785,7 @@ define void @v_shuffle_v2i64_v4i64__5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2874,8 +2830,7 @@ define void @v_shuffle_v2i64_v4i64__6_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v2 -; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2970,8 +2925,7 @@ define void @v_shuffle_v2i64_v4i64__0_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3026,8 +2980,7 @@ define void @v_shuffle_v2i64_v4i64__1_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3082,8 +3035,7 @@ define void @v_shuffle_v2i64_v4i64__2_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v10 -; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3138,8 +3090,7 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v6 -; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3184,8 +3135,7 @@ define void @v_shuffle_v2i64_v4i64__4_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3270,8 +3220,7 @@ define void @v_shuffle_v2i64_v4i64__6_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3366,8 +3315,7 @@ define void @v_shuffle_v2i64_v4i64__0_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3422,8 +3370,7 @@ define void @v_shuffle_v2i64_v4i64__1_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v10 -; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3478,8 +3425,7 @@ define void @v_shuffle_v2i64_v4i64__2_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v12 -; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3534,8 +3480,7 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3580,8 +3525,7 @@ define void @v_shuffle_v2i64_v4i64__4_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3626,8 +3570,7 @@ define void @v_shuffle_v2i64_v4i64__5_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3765,8 +3708,7 @@ define void @s_shuffle_v2i64_v4i64__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3850,8 +3792,7 @@ define void @s_shuffle_v2i64_v4i64__3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3909,8 +3850,7 @@ define void @s_shuffle_v2i64_v4i64__5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3996,8 +3936,7 @@ define void @s_shuffle_v2i64_v4i64__7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4056,10 +3995,8 @@ define void @s_shuffle_v2i64_v4i64__7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4113,8 +4050,7 @@ define void @s_shuffle_v2i64_v4i64__7_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4172,10 +4108,8 @@ define void @s_shuffle_v2i64_v4i64__7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4230,8 +4164,7 @@ define void @s_shuffle_v2i64_v4i64__7_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4280,10 +4213,8 @@ define void @s_shuffle_v2i64_v4i64__7_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4296,18 +4227,43 @@ define void @s_shuffle_v2i64_v4i64__7_4() { } define void @s_shuffle_v2i64_v4i64__7_5() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__7_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__7_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__7_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4352,10 +4308,8 @@ define void @s_shuffle_v2i64_v4i64__7_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4368,18 +4322,43 @@ define void @s_shuffle_v2i64_v4i64__7_6() { } define void @s_shuffle_v2i64_v4i64__7_7() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -4420,8 +4399,7 @@ define void @s_shuffle_v2i64_v4i64__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4433,18 +4411,43 @@ define void @s_shuffle_v2i64_v4i64__u_0() { } define void @s_shuffle_v2i64_v4i64__0_0() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4488,10 +4491,8 @@ define void @s_shuffle_v2i64_v4i64__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4503,18 +4504,43 @@ define void @s_shuffle_v2i64_v4i64__1_0() { } define void @s_shuffle_v2i64_v4i64__2_0() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s5 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4558,10 +4584,8 @@ define void @s_shuffle_v2i64_v4i64__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4605,8 +4629,7 @@ define void @s_shuffle_v2i64_v4i64__4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4664,10 +4687,8 @@ define void @s_shuffle_v2i64_v4i64__5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4722,8 +4743,7 @@ define void @s_shuffle_v2i64_v4i64__6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4816,18 +4836,43 @@ define void @s_shuffle_v2i64_v4i64__0_1() { } define void @s_shuffle_v2i64_v4i64__1_1() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4835,18 +4880,43 @@ define void @s_shuffle_v2i64_v4i64__1_1() { } define void @s_shuffle_v2i64_v4i64__2_1() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s6 -; GFX9-NEXT: s_mov_b32 s11, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4854,18 +4924,43 @@ define void @s_shuffle_v2i64_v4i64__2_1() { } define void @s_shuffle_v2i64_v4i64__3_1() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__3_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__3_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -4954,8 +5049,7 @@ define void @s_shuffle_v2i64_v4i64__5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5010,8 +5104,7 @@ define void @s_shuffle_v2i64_v4i64__6_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5056,8 +5149,7 @@ define void @s_shuffle_v2i64_v4i64__u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5069,18 +5161,43 @@ define void @s_shuffle_v2i64_v4i64__u_2() { } define void @s_shuffle_v2i64_v4i64__0_2() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5124,10 +5241,8 @@ define void @s_shuffle_v2i64_v4i64__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5139,18 +5254,43 @@ define void @s_shuffle_v2i64_v4i64__1_2() { } define void @s_shuffle_v2i64_v4i64__2_2() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5194,10 +5334,8 @@ define void @s_shuffle_v2i64_v4i64__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5241,8 +5379,7 @@ define void @s_shuffle_v2i64_v4i64__4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5299,10 +5436,8 @@ define void @s_shuffle_v2i64_v4i64__5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5357,8 +5492,7 @@ define void @s_shuffle_v2i64_v4i64__6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5411,18 +5545,43 @@ define void @s_shuffle_v2i64_v4i64__u_3() { } define void @s_shuffle_v2i64_v4i64__0_3() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5430,18 +5589,43 @@ define void @s_shuffle_v2i64_v4i64__0_3() { } define void @s_shuffle_v2i64_v4i64__1_3() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__1_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__1_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__1_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5489,18 +5673,43 @@ define void @s_shuffle_v2i64_v4i64__2_3() { } define void @s_shuffle_v2i64_v4i64__3_3() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -5590,8 +5799,7 @@ define void @s_shuffle_v2i64_v4i64__5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5646,8 +5854,7 @@ define void @s_shuffle_v2i64_v4i64__6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5746,8 +5953,7 @@ define void @s_shuffle_v2i64_v4i64__1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5831,8 +6037,7 @@ define void @s_shuffle_v2i64_v4i64__3_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5894,10 +6099,8 @@ define void @s_shuffle_v2i64_v4i64__5_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5910,18 +6113,43 @@ define void @s_shuffle_v2i64_v4i64__5_4() { } define void @s_shuffle_v2i64_v4i64__6_4() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s5 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6012,8 +6240,7 @@ define void @s_shuffle_v2i64_v4i64__0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6067,8 +6294,7 @@ define void @s_shuffle_v2i64_v4i64__1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6123,8 +6349,7 @@ define void @s_shuffle_v2i64_v4i64__2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6178,8 +6403,7 @@ define void @s_shuffle_v2i64_v4i64__3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6233,18 +6457,43 @@ define void @s_shuffle_v2i64_v4i64__4_5() { } define void @s_shuffle_v2i64_v4i64__5_5() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6253,18 +6502,43 @@ define void @s_shuffle_v2i64_v4i64__5_5() { } define void @s_shuffle_v2i64_v4i64__6_5() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s6 -; GFX9-NEXT: s_mov_b32 s11, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6305,8 +6579,7 @@ define void @s_shuffle_v2i64_v4i64__u_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6360,8 +6633,7 @@ define void @s_shuffle_v2i64_v4i64__0_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6419,10 +6691,8 @@ define void @s_shuffle_v2i64_v4i64__1_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6477,8 +6747,7 @@ define void @s_shuffle_v2i64_v4i64__2_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6536,10 +6805,8 @@ define void @s_shuffle_v2i64_v4i64__3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6552,18 +6819,43 @@ define void @s_shuffle_v2i64_v4i64__3_6() { } define void @s_shuffle_v2i64_v4i64__4_6() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__4_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__4_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__4_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6608,10 +6900,8 @@ define void @s_shuffle_v2i64_v4i64__5_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6624,18 +6914,43 @@ define void @s_shuffle_v2i64_v4i64__5_6() { } define void @s_shuffle_v2i64_v4i64__6_6() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6726,8 +7041,7 @@ define void @s_shuffle_v2i64_v4i64__0_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6782,8 +7096,7 @@ define void @s_shuffle_v2i64_v4i64__1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6838,8 +7151,7 @@ define void @s_shuffle_v2i64_v4i64__2_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6894,8 +7206,7 @@ define void @s_shuffle_v2i64_v4i64__3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6908,18 +7219,43 @@ define void @s_shuffle_v2i64_v4i64__3_7() { } define void @s_shuffle_v2i64_v4i64__4_7() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__4_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__4_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__4_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> @@ -6928,18 +7264,43 @@ define void @s_shuffle_v2i64_v4i64__4_7() { } define void @s_shuffle_v2i64_v4i64__5_7() { -; GFX9-LABEL: s_shuffle_v2i64_v4i64__5_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v4i64__5_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v4i64__5_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll index 7ee7c83e0122d..c8aac3a841c69 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll @@ -322,8 +322,7 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v14 -; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -618,8 +617,7 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v14 -; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -678,10 +676,8 @@ define void @v_shuffle_v2i64_v8i64__15_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v16 -; GFX942-NEXT: v_mov_b32_e32 v3, v17 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -736,8 +732,7 @@ define void @v_shuffle_v2i64_v8i64__15_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v18 -; GFX942-NEXT: v_mov_b32_e32 v1, v19 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -792,8 +787,7 @@ define void @v_shuffle_v2i64_v8i64__15_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v20 -; GFX942-NEXT: v_mov_b32_e32 v3, v21 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[20:21] ; GFX942-NEXT: global_store_dwordx4 v22, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -848,8 +842,7 @@ define void @v_shuffle_v2i64_v8i64__15_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v22 -; GFX942-NEXT: v_mov_b32_e32 v5, v23 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[22:23] ; GFX942-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -904,8 +897,7 @@ define void @v_shuffle_v2i64_v8i64__15_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v24 -; GFX942-NEXT: v_mov_b32_e32 v7, v25 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[24:25] ; GFX942-NEXT: global_store_dwordx4 v26, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -960,8 +952,7 @@ define void @v_shuffle_v2i64_v8i64__15_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v26 -; GFX942-NEXT: v_mov_b32_e32 v9, v27 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[26:27] ; GFX942-NEXT: global_store_dwordx4 v28, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1016,8 +1007,7 @@ define void @v_shuffle_v2i64_v8i64__15_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v28 -; GFX942-NEXT: v_mov_b32_e32 v11, v29 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[28:29] ; GFX942-NEXT: global_store_dwordx4 v30, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1072,8 +1062,7 @@ define void @v_shuffle_v2i64_v8i64__15_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[16:31] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v30 -; GFX942-NEXT: v_mov_b32_e32 v13, v31 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[30:31] ; GFX942-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1122,10 +1111,8 @@ define void @v_shuffle_v2i64_v8i64__15_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1170,8 +1157,7 @@ define void @v_shuffle_v2i64_v8i64__15_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v14 -; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1216,8 +1202,7 @@ define void @v_shuffle_v2i64_v8i64__15_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1262,8 +1247,7 @@ define void @v_shuffle_v2i64_v8i64__15_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1308,8 +1292,7 @@ define void @v_shuffle_v2i64_v8i64__15_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v14 -; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1354,8 +1337,7 @@ define void @v_shuffle_v2i64_v8i64__15_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v14 -; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1400,8 +1382,7 @@ define void @v_shuffle_v2i64_v8i64__15_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v14 -; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1446,8 +1427,7 @@ define void @v_shuffle_v2i64_v8i64__15_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v14 -; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1492,8 +1472,7 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1537,8 +1516,7 @@ define void @v_shuffle_v2i64_v8i64__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1582,8 +1560,7 @@ define void @v_shuffle_v2i64_v8i64__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1627,8 +1604,7 @@ define void @v_shuffle_v2i64_v8i64__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1672,8 +1648,7 @@ define void @v_shuffle_v2i64_v8i64__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1717,8 +1692,7 @@ define void @v_shuffle_v2i64_v8i64__4_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v0 -; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1762,8 +1736,7 @@ define void @v_shuffle_v2i64_v8i64__5_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v0 -; GFX942-NEXT: v_mov_b32_e32 v13, v1 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1807,8 +1780,7 @@ define void @v_shuffle_v2i64_v8i64__6_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v0 -; GFX942-NEXT: v_mov_b32_e32 v15, v1 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1856,10 +1828,8 @@ define void @v_shuffle_v2i64_v8i64__7_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1903,8 +1873,7 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1958,8 +1927,7 @@ define void @v_shuffle_v2i64_v8i64__9_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2014,8 +1982,7 @@ define void @v_shuffle_v2i64_v8i64__10_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2070,8 +2037,7 @@ define void @v_shuffle_v2i64_v8i64__11_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v0 -; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2126,8 +2092,7 @@ define void @v_shuffle_v2i64_v8i64__12_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v0 -; GFX942-NEXT: v_mov_b32_e32 v13, v1 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2182,8 +2147,7 @@ define void @v_shuffle_v2i64_v8i64__13_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v0 -; GFX942-NEXT: v_mov_b32_e32 v15, v1 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2238,8 +2202,7 @@ define void @v_shuffle_v2i64_v8i64__14_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v16, v0 -; GFX942-NEXT: v_mov_b32_e32 v17, v1 +; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2362,8 +2325,7 @@ define void @v_shuffle_v2i64_v8i64__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2407,8 +2369,7 @@ define void @v_shuffle_v2i64_v8i64__2_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v2 -; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2452,8 +2413,7 @@ define void @v_shuffle_v2i64_v8i64__3_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2497,8 +2457,7 @@ define void @v_shuffle_v2i64_v8i64__4_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v2 -; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2542,8 +2501,7 @@ define void @v_shuffle_v2i64_v8i64__5_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v2 -; GFX942-NEXT: v_mov_b32_e32 v13, v3 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2587,8 +2545,7 @@ define void @v_shuffle_v2i64_v8i64__6_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v2 -; GFX942-NEXT: v_mov_b32_e32 v15, v3 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2632,8 +2589,7 @@ define void @v_shuffle_v2i64_v8i64__7_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v14 -; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2726,8 +2682,7 @@ define void @v_shuffle_v2i64_v8i64__9_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2782,8 +2737,7 @@ define void @v_shuffle_v2i64_v8i64__10_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v2 -; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v20, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2838,8 +2792,7 @@ define void @v_shuffle_v2i64_v8i64__11_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v2 -; GFX942-NEXT: v_mov_b32_e32 v13, v3 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v20, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2894,8 +2847,7 @@ define void @v_shuffle_v2i64_v8i64__12_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v2 -; GFX942-NEXT: v_mov_b32_e32 v15, v3 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v20, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2950,8 +2902,7 @@ define void @v_shuffle_v2i64_v8i64__13_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v16, v2 -; GFX942-NEXT: v_mov_b32_e32 v17, v3 +; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v20, v[14:17], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3006,8 +2957,7 @@ define void @v_shuffle_v2i64_v8i64__14_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v18, v2 -; GFX942-NEXT: v_mov_b32_e32 v19, v3 +; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3091,8 +3041,7 @@ define void @v_shuffle_v2i64_v8i64__0_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3175,8 +3124,7 @@ define void @v_shuffle_v2i64_v8i64__2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3220,8 +3168,7 @@ define void @v_shuffle_v2i64_v8i64__3_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v4 -; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3265,8 +3212,7 @@ define void @v_shuffle_v2i64_v8i64__4_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3310,8 +3256,7 @@ define void @v_shuffle_v2i64_v8i64__5_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v4 -; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3355,8 +3300,7 @@ define void @v_shuffle_v2i64_v8i64__6_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v4 -; GFX942-NEXT: v_mov_b32_e32 v15, v5 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3400,8 +3344,7 @@ define void @v_shuffle_v2i64_v8i64__7_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3494,8 +3437,7 @@ define void @v_shuffle_v2i64_v8i64__9_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v22, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3550,8 +3492,7 @@ define void @v_shuffle_v2i64_v8i64__10_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v4 -; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v22, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3606,8 +3547,7 @@ define void @v_shuffle_v2i64_v8i64__11_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v4 -; GFX942-NEXT: v_mov_b32_e32 v15, v5 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v22, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3662,8 +3602,7 @@ define void @v_shuffle_v2i64_v8i64__12_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v16, v4 -; GFX942-NEXT: v_mov_b32_e32 v17, v5 +; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v22, v[14:17], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3718,8 +3657,7 @@ define void @v_shuffle_v2i64_v8i64__13_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v18, v4 -; GFX942-NEXT: v_mov_b32_e32 v19, v5 +; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v22, v[16:19], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3774,8 +3712,7 @@ define void @v_shuffle_v2i64_v8i64__14_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v20, v4 -; GFX942-NEXT: v_mov_b32_e32 v21, v5 +; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v22, v[18:21], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3859,8 +3796,7 @@ define void @v_shuffle_v2i64_v8i64__0_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3904,8 +3840,7 @@ define void @v_shuffle_v2i64_v8i64__1_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3988,8 +3923,7 @@ define void @v_shuffle_v2i64_v8i64__3_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v6 -; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4033,8 +3967,7 @@ define void @v_shuffle_v2i64_v8i64__4_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v6 -; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4078,8 +4011,7 @@ define void @v_shuffle_v2i64_v8i64__5_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4123,8 +4055,7 @@ define void @v_shuffle_v2i64_v8i64__6_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v6 -; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4168,8 +4099,7 @@ define void @v_shuffle_v2i64_v8i64__7_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4262,8 +4192,7 @@ define void @v_shuffle_v2i64_v8i64__9_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v24, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4318,8 +4247,7 @@ define void @v_shuffle_v2i64_v8i64__10_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v6 -; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4374,8 +4302,7 @@ define void @v_shuffle_v2i64_v8i64__11_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v16, v6 -; GFX942-NEXT: v_mov_b32_e32 v17, v7 +; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v24, v[14:17], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4430,8 +4357,7 @@ define void @v_shuffle_v2i64_v8i64__12_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v18, v6 -; GFX942-NEXT: v_mov_b32_e32 v19, v7 +; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4486,8 +4412,7 @@ define void @v_shuffle_v2i64_v8i64__13_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v20, v6 -; GFX942-NEXT: v_mov_b32_e32 v21, v7 +; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v24, v[18:21], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4542,8 +4467,7 @@ define void @v_shuffle_v2i64_v8i64__14_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v22, v6 -; GFX942-NEXT: v_mov_b32_e32 v23, v7 +; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4627,8 +4551,7 @@ define void @v_shuffle_v2i64_v8i64__0_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4672,8 +4595,7 @@ define void @v_shuffle_v2i64_v8i64__1_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4717,8 +4639,7 @@ define void @v_shuffle_v2i64_v8i64__2_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v8 -; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4801,8 +4722,7 @@ define void @v_shuffle_v2i64_v8i64__4_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v8 -; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4846,8 +4766,7 @@ define void @v_shuffle_v2i64_v8i64__5_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v8 -; GFX942-NEXT: v_mov_b32_e32 v13, v9 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4891,8 +4810,7 @@ define void @v_shuffle_v2i64_v8i64__6_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v8 -; GFX942-NEXT: v_mov_b32_e32 v15, v9 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -4936,8 +4854,7 @@ define void @v_shuffle_v2i64_v8i64__7_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v14 -; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5030,8 +4947,7 @@ define void @v_shuffle_v2i64_v8i64__9_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v8 -; GFX942-NEXT: v_mov_b32_e32 v15, v9 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v26, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5086,8 +5002,7 @@ define void @v_shuffle_v2i64_v8i64__10_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v16, v8 -; GFX942-NEXT: v_mov_b32_e32 v17, v9 +; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v26, v[14:17], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5142,8 +5057,7 @@ define void @v_shuffle_v2i64_v8i64__11_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v18, v8 -; GFX942-NEXT: v_mov_b32_e32 v19, v9 +; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v26, v[16:19], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5198,8 +5112,7 @@ define void @v_shuffle_v2i64_v8i64__12_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v20, v8 -; GFX942-NEXT: v_mov_b32_e32 v21, v9 +; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v26, v[18:21], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5254,8 +5167,7 @@ define void @v_shuffle_v2i64_v8i64__13_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v22, v8 -; GFX942-NEXT: v_mov_b32_e32 v23, v9 +; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v26, v[20:23], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5310,8 +5222,7 @@ define void @v_shuffle_v2i64_v8i64__14_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v24, v8 -; GFX942-NEXT: v_mov_b32_e32 v25, v9 +; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v26, v[22:25], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5395,8 +5306,7 @@ define void @v_shuffle_v2i64_v8i64__0_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v10 -; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5440,8 +5350,7 @@ define void @v_shuffle_v2i64_v8i64__1_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v10 -; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5485,8 +5394,7 @@ define void @v_shuffle_v2i64_v8i64__2_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v10 -; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5530,8 +5438,7 @@ define void @v_shuffle_v2i64_v8i64__3_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v10 -; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5614,8 +5521,7 @@ define void @v_shuffle_v2i64_v8i64__5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v10 -; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5659,8 +5565,7 @@ define void @v_shuffle_v2i64_v8i64__6_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v10 -; GFX942-NEXT: v_mov_b32_e32 v15, v11 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5704,8 +5609,7 @@ define void @v_shuffle_v2i64_v8i64__7_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v14 -; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5798,8 +5702,7 @@ define void @v_shuffle_v2i64_v8i64__9_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v16, v10 -; GFX942-NEXT: v_mov_b32_e32 v17, v11 +; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v28, v[14:17], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5854,8 +5757,7 @@ define void @v_shuffle_v2i64_v8i64__10_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v18, v10 -; GFX942-NEXT: v_mov_b32_e32 v19, v11 +; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v28, v[16:19], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5910,8 +5812,7 @@ define void @v_shuffle_v2i64_v8i64__11_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v20, v10 -; GFX942-NEXT: v_mov_b32_e32 v21, v11 +; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v28, v[18:21], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -5966,8 +5867,7 @@ define void @v_shuffle_v2i64_v8i64__12_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v22, v10 -; GFX942-NEXT: v_mov_b32_e32 v23, v11 +; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v28, v[20:23], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6022,8 +5922,7 @@ define void @v_shuffle_v2i64_v8i64__13_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v24, v10 -; GFX942-NEXT: v_mov_b32_e32 v25, v11 +; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v28, v[22:25], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6078,8 +5977,7 @@ define void @v_shuffle_v2i64_v8i64__14_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v26, v10 -; GFX942-NEXT: v_mov_b32_e32 v27, v11 +; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v28, v[24:27], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6163,8 +6061,7 @@ define void @v_shuffle_v2i64_v8i64__0_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v12 -; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6208,8 +6105,7 @@ define void @v_shuffle_v2i64_v8i64__1_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v12 -; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6253,8 +6149,7 @@ define void @v_shuffle_v2i64_v8i64__2_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v12 -; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6298,8 +6193,7 @@ define void @v_shuffle_v2i64_v8i64__3_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v12 -; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6343,8 +6237,7 @@ define void @v_shuffle_v2i64_v8i64__4_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v12 -; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6427,8 +6320,7 @@ define void @v_shuffle_v2i64_v8i64__6_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v12 -; GFX942-NEXT: v_mov_b32_e32 v15, v13 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6472,8 +6364,7 @@ define void @v_shuffle_v2i64_v8i64__7_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v14 -; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6566,8 +6457,7 @@ define void @v_shuffle_v2i64_v8i64__9_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v18, v12 -; GFX942-NEXT: v_mov_b32_e32 v19, v13 +; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v30, v[16:19], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6622,8 +6512,7 @@ define void @v_shuffle_v2i64_v8i64__10_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v20, v12 -; GFX942-NEXT: v_mov_b32_e32 v21, v13 +; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v30, v[18:21], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6678,8 +6567,7 @@ define void @v_shuffle_v2i64_v8i64__11_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v22, v12 -; GFX942-NEXT: v_mov_b32_e32 v23, v13 +; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v30, v[20:23], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6734,8 +6622,7 @@ define void @v_shuffle_v2i64_v8i64__12_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v24, v12 -; GFX942-NEXT: v_mov_b32_e32 v25, v13 +; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v30, v[22:25], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6790,8 +6677,7 @@ define void @v_shuffle_v2i64_v8i64__13_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v26, v12 -; GFX942-NEXT: v_mov_b32_e32 v27, v13 +; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v30, v[24:27], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6846,8 +6732,7 @@ define void @v_shuffle_v2i64_v8i64__14_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v28, v12 -; GFX942-NEXT: v_mov_b32_e32 v29, v13 +; GFX942-NEXT: v_mov_b64_e32 v[28:29], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v30, v[26:29], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6931,8 +6816,7 @@ define void @v_shuffle_v2i64_v8i64__0_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -6976,8 +6860,7 @@ define void @v_shuffle_v2i64_v8i64__1_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7021,8 +6904,7 @@ define void @v_shuffle_v2i64_v8i64__2_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v14 -; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7066,8 +6948,7 @@ define void @v_shuffle_v2i64_v8i64__3_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v14 -; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7111,8 +6992,7 @@ define void @v_shuffle_v2i64_v8i64__4_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v14 -; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7156,8 +7036,7 @@ define void @v_shuffle_v2i64_v8i64__5_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v14 -; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7240,8 +7119,7 @@ define void @v_shuffle_v2i64_v8i64__7_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v14 -; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7334,8 +7212,7 @@ define void @v_shuffle_v2i64_v8i64__9_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v20, v14 -; GFX942-NEXT: v_mov_b32_e32 v21, v15 +; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7390,8 +7267,7 @@ define void @v_shuffle_v2i64_v8i64__10_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v22, v14 -; GFX942-NEXT: v_mov_b32_e32 v23, v15 +; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7446,8 +7322,7 @@ define void @v_shuffle_v2i64_v8i64__11_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v24, v14 -; GFX942-NEXT: v_mov_b32_e32 v25, v15 +; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7502,8 +7377,7 @@ define void @v_shuffle_v2i64_v8i64__12_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v26, v14 -; GFX942-NEXT: v_mov_b32_e32 v27, v15 +; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7558,8 +7432,7 @@ define void @v_shuffle_v2i64_v8i64__13_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v28, v14 -; GFX942-NEXT: v_mov_b32_e32 v29, v15 +; GFX942-NEXT: v_mov_b64_e32 v[28:29], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7614,8 +7487,7 @@ define void @v_shuffle_v2i64_v8i64__14_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v30, v14 -; GFX942-NEXT: v_mov_b32_e32 v31, v15 +; GFX942-NEXT: v_mov_b64_e32 v[30:31], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -7944,8 +7816,7 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v14 -; GFX942-NEXT: v_mov_b32_e32 v1, v15 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8000,8 +7871,7 @@ define void @v_shuffle_v2i64_v8i64__9_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8046,8 +7916,7 @@ define void @v_shuffle_v2i64_v8i64__10_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8092,8 +7961,7 @@ define void @v_shuffle_v2i64_v8i64__11_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8138,8 +8006,7 @@ define void @v_shuffle_v2i64_v8i64__12_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v0 -; GFX942-NEXT: v_mov_b32_e32 v11, v1 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8184,8 +8051,7 @@ define void @v_shuffle_v2i64_v8i64__13_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v0 -; GFX942-NEXT: v_mov_b32_e32 v13, v1 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8230,8 +8096,7 @@ define void @v_shuffle_v2i64_v8i64__14_8(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v0 -; GFX942-NEXT: v_mov_b32_e32 v15, v1 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8326,8 +8191,7 @@ define void @v_shuffle_v2i64_v8i64__0_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8382,8 +8246,7 @@ define void @v_shuffle_v2i64_v8i64__1_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8438,8 +8301,7 @@ define void @v_shuffle_v2i64_v8i64__2_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v8 -; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8494,8 +8356,7 @@ define void @v_shuffle_v2i64_v8i64__3_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v10 -; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8550,8 +8411,7 @@ define void @v_shuffle_v2i64_v8i64__4_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v12 -; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8606,8 +8466,7 @@ define void @v_shuffle_v2i64_v8i64__5_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v14 -; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8662,8 +8521,7 @@ define void @v_shuffle_v2i64_v8i64__6_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v16 -; GFX942-NEXT: v_mov_b32_e32 v15, v17 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8718,8 +8576,7 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v16 -; GFX942-NEXT: v_mov_b32_e32 v1, v17 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8804,8 +8661,7 @@ define void @v_shuffle_v2i64_v8i64__9_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8850,8 +8706,7 @@ define void @v_shuffle_v2i64_v8i64__10_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v2 -; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8896,8 +8751,7 @@ define void @v_shuffle_v2i64_v8i64__11_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8942,8 +8796,7 @@ define void @v_shuffle_v2i64_v8i64__12_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v2 -; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -8988,8 +8841,7 @@ define void @v_shuffle_v2i64_v8i64__13_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v2 -; GFX942-NEXT: v_mov_b32_e32 v13, v3 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9034,8 +8886,7 @@ define void @v_shuffle_v2i64_v8i64__14_9(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v2 -; GFX942-NEXT: v_mov_b32_e32 v15, v3 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9130,8 +8981,7 @@ define void @v_shuffle_v2i64_v8i64__0_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9186,8 +9036,7 @@ define void @v_shuffle_v2i64_v8i64__1_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9242,8 +9091,7 @@ define void @v_shuffle_v2i64_v8i64__2_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v10 -; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9298,8 +9146,7 @@ define void @v_shuffle_v2i64_v8i64__3_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v12 -; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9354,8 +9201,7 @@ define void @v_shuffle_v2i64_v8i64__4_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v14 -; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9410,8 +9256,7 @@ define void @v_shuffle_v2i64_v8i64__5_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v16 -; GFX942-NEXT: v_mov_b32_e32 v13, v17 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9466,8 +9311,7 @@ define void @v_shuffle_v2i64_v8i64__6_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v18 -; GFX942-NEXT: v_mov_b32_e32 v15, v19 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9522,8 +9366,7 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v16 -; GFX942-NEXT: v_mov_b32_e32 v3, v17 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9568,8 +9411,7 @@ define void @v_shuffle_v2i64_v8i64__8_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9654,8 +9496,7 @@ define void @v_shuffle_v2i64_v8i64__10_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9700,8 +9541,7 @@ define void @v_shuffle_v2i64_v8i64__11_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v4 -; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9746,8 +9586,7 @@ define void @v_shuffle_v2i64_v8i64__12_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9792,8 +9631,7 @@ define void @v_shuffle_v2i64_v8i64__13_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v4 -; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9838,8 +9676,7 @@ define void @v_shuffle_v2i64_v8i64__14_10(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v4 -; GFX942-NEXT: v_mov_b32_e32 v15, v5 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9934,8 +9771,7 @@ define void @v_shuffle_v2i64_v8i64__0_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -9990,8 +9826,7 @@ define void @v_shuffle_v2i64_v8i64__1_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v10 -; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10046,8 +9881,7 @@ define void @v_shuffle_v2i64_v8i64__2_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v12 -; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10102,8 +9936,7 @@ define void @v_shuffle_v2i64_v8i64__3_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v14 -; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10158,8 +9991,7 @@ define void @v_shuffle_v2i64_v8i64__4_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v16 -; GFX942-NEXT: v_mov_b32_e32 v11, v17 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10214,8 +10046,7 @@ define void @v_shuffle_v2i64_v8i64__5_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v18 -; GFX942-NEXT: v_mov_b32_e32 v13, v19 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10270,8 +10101,7 @@ define void @v_shuffle_v2i64_v8i64__6_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v20 -; GFX942-NEXT: v_mov_b32_e32 v15, v21 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[20:21] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10326,8 +10156,7 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v16 -; GFX942-NEXT: v_mov_b32_e32 v5, v17 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10372,8 +10201,7 @@ define void @v_shuffle_v2i64_v8i64__8_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10418,8 +10246,7 @@ define void @v_shuffle_v2i64_v8i64__9_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10504,8 +10331,7 @@ define void @v_shuffle_v2i64_v8i64__11_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v6 -; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10550,8 +10376,7 @@ define void @v_shuffle_v2i64_v8i64__12_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v6 -; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10596,8 +10421,7 @@ define void @v_shuffle_v2i64_v8i64__13_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10642,8 +10466,7 @@ define void @v_shuffle_v2i64_v8i64__14_11(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v6 -; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10738,8 +10561,7 @@ define void @v_shuffle_v2i64_v8i64__0_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v10 -; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10794,8 +10616,7 @@ define void @v_shuffle_v2i64_v8i64__1_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v12 -; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10850,8 +10671,7 @@ define void @v_shuffle_v2i64_v8i64__2_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v14 -; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10906,8 +10726,7 @@ define void @v_shuffle_v2i64_v8i64__3_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v16 -; GFX942-NEXT: v_mov_b32_e32 v9, v17 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -10962,8 +10781,7 @@ define void @v_shuffle_v2i64_v8i64__4_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v18 -; GFX942-NEXT: v_mov_b32_e32 v11, v19 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11018,8 +10836,7 @@ define void @v_shuffle_v2i64_v8i64__5_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v20 -; GFX942-NEXT: v_mov_b32_e32 v13, v21 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[20:21] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11074,8 +10891,7 @@ define void @v_shuffle_v2i64_v8i64__6_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v22 -; GFX942-NEXT: v_mov_b32_e32 v15, v23 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[22:23] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11130,8 +10946,7 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v16 -; GFX942-NEXT: v_mov_b32_e32 v7, v17 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11176,8 +10991,7 @@ define void @v_shuffle_v2i64_v8i64__8_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11222,8 +11036,7 @@ define void @v_shuffle_v2i64_v8i64__9_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11268,8 +11081,7 @@ define void @v_shuffle_v2i64_v8i64__10_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v8 -; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11354,8 +11166,7 @@ define void @v_shuffle_v2i64_v8i64__12_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v8 -; GFX942-NEXT: v_mov_b32_e32 v11, v9 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11400,8 +11211,7 @@ define void @v_shuffle_v2i64_v8i64__13_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v8 -; GFX942-NEXT: v_mov_b32_e32 v13, v9 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11446,8 +11256,7 @@ define void @v_shuffle_v2i64_v8i64__14_12(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v8 -; GFX942-NEXT: v_mov_b32_e32 v15, v9 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11542,8 +11351,7 @@ define void @v_shuffle_v2i64_v8i64__0_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v12 -; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11598,8 +11406,7 @@ define void @v_shuffle_v2i64_v8i64__1_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11654,8 +11461,7 @@ define void @v_shuffle_v2i64_v8i64__2_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v16 -; GFX942-NEXT: v_mov_b32_e32 v7, v17 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11710,8 +11516,7 @@ define void @v_shuffle_v2i64_v8i64__3_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v18 -; GFX942-NEXT: v_mov_b32_e32 v9, v19 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11766,8 +11571,7 @@ define void @v_shuffle_v2i64_v8i64__4_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v20 -; GFX942-NEXT: v_mov_b32_e32 v11, v21 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[20:21] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11822,8 +11626,7 @@ define void @v_shuffle_v2i64_v8i64__5_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v22 -; GFX942-NEXT: v_mov_b32_e32 v13, v23 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[22:23] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11878,8 +11681,7 @@ define void @v_shuffle_v2i64_v8i64__6_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v24 -; GFX942-NEXT: v_mov_b32_e32 v15, v25 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[24:25] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11934,8 +11736,7 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v16 -; GFX942-NEXT: v_mov_b32_e32 v9, v17 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -11980,8 +11781,7 @@ define void @v_shuffle_v2i64_v8i64__8_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v10 -; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12026,8 +11826,7 @@ define void @v_shuffle_v2i64_v8i64__9_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v10 -; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12072,8 +11871,7 @@ define void @v_shuffle_v2i64_v8i64__10_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v10 -; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12118,8 +11916,7 @@ define void @v_shuffle_v2i64_v8i64__11_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v10 -; GFX942-NEXT: v_mov_b32_e32 v9, v11 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12204,8 +12001,7 @@ define void @v_shuffle_v2i64_v8i64__13_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v10 -; GFX942-NEXT: v_mov_b32_e32 v13, v11 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12250,8 +12046,7 @@ define void @v_shuffle_v2i64_v8i64__14_13(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v10 -; GFX942-NEXT: v_mov_b32_e32 v15, v11 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12346,8 +12141,7 @@ define void @v_shuffle_v2i64_v8i64__0_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12402,8 +12196,7 @@ define void @v_shuffle_v2i64_v8i64__1_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v16 -; GFX942-NEXT: v_mov_b32_e32 v5, v17 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12458,8 +12251,7 @@ define void @v_shuffle_v2i64_v8i64__2_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v18 -; GFX942-NEXT: v_mov_b32_e32 v7, v19 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12514,8 +12306,7 @@ define void @v_shuffle_v2i64_v8i64__3_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v20 -; GFX942-NEXT: v_mov_b32_e32 v9, v21 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[20:21] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12570,8 +12361,7 @@ define void @v_shuffle_v2i64_v8i64__4_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v22 -; GFX942-NEXT: v_mov_b32_e32 v11, v23 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[22:23] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12626,8 +12416,7 @@ define void @v_shuffle_v2i64_v8i64__5_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v24 -; GFX942-NEXT: v_mov_b32_e32 v13, v25 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[24:25] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12682,8 +12471,7 @@ define void @v_shuffle_v2i64_v8i64__6_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v26 -; GFX942-NEXT: v_mov_b32_e32 v15, v27 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[26:27] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12738,8 +12526,7 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v16 -; GFX942-NEXT: v_mov_b32_e32 v11, v17 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12784,8 +12571,7 @@ define void @v_shuffle_v2i64_v8i64__8_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v12 -; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12830,8 +12616,7 @@ define void @v_shuffle_v2i64_v8i64__9_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v12 -; GFX942-NEXT: v_mov_b32_e32 v5, v13 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12876,8 +12661,7 @@ define void @v_shuffle_v2i64_v8i64__10_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v12 -; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12922,8 +12706,7 @@ define void @v_shuffle_v2i64_v8i64__11_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v12 -; GFX942-NEXT: v_mov_b32_e32 v9, v13 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -12968,8 +12751,7 @@ define void @v_shuffle_v2i64_v8i64__12_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v12 -; GFX942-NEXT: v_mov_b32_e32 v11, v13 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13054,8 +12836,7 @@ define void @v_shuffle_v2i64_v8i64__14_14(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v12 -; GFX942-NEXT: v_mov_b32_e32 v15, v13 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13150,8 +12931,7 @@ define void @v_shuffle_v2i64_v8i64__0_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:17] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v16 -; GFX942-NEXT: v_mov_b32_e32 v3, v17 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13206,8 +12986,7 @@ define void @v_shuffle_v2i64_v8i64__1_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v18 -; GFX942-NEXT: v_mov_b32_e32 v5, v19 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[18:19] ; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13262,8 +13041,7 @@ define void @v_shuffle_v2i64_v8i64__2_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:21] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v20 -; GFX942-NEXT: v_mov_b32_e32 v7, v21 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[20:21] ; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13318,8 +13096,7 @@ define void @v_shuffle_v2i64_v8i64__3_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v22 -; GFX942-NEXT: v_mov_b32_e32 v9, v23 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[22:23] ; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13374,8 +13151,7 @@ define void @v_shuffle_v2i64_v8i64__4_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[10:25] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v24 -; GFX942-NEXT: v_mov_b32_e32 v11, v25 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[24:25] ; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13430,8 +13206,7 @@ define void @v_shuffle_v2i64_v8i64__5_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v26 -; GFX942-NEXT: v_mov_b32_e32 v13, v27 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[26:27] ; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13486,8 +13261,7 @@ define void @v_shuffle_v2i64_v8i64__6_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[14:29] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v28 -; GFX942-NEXT: v_mov_b32_e32 v15, v29 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[28:29] ; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13542,8 +13316,7 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v16 -; GFX942-NEXT: v_mov_b32_e32 v13, v17 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[16:17] ; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13588,8 +13361,7 @@ define void @v_shuffle_v2i64_v8i64__8_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v14 -; GFX942-NEXT: v_mov_b32_e32 v3, v15 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13634,8 +13406,7 @@ define void @v_shuffle_v2i64_v8i64__9_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13680,8 +13451,7 @@ define void @v_shuffle_v2i64_v8i64__10_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v14 -; GFX942-NEXT: v_mov_b32_e32 v7, v15 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13726,8 +13496,7 @@ define void @v_shuffle_v2i64_v8i64__11_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v14 -; GFX942-NEXT: v_mov_b32_e32 v9, v15 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13772,8 +13541,7 @@ define void @v_shuffle_v2i64_v8i64__12_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v14 -; GFX942-NEXT: v_mov_b32_e32 v11, v15 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13818,8 +13586,7 @@ define void @v_shuffle_v2i64_v8i64__13_15(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v16, 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v14 -; GFX942-NEXT: v_mov_b32_e32 v13, v15 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -13957,8 +13724,7 @@ define void @s_shuffle_v2i64_v8i64__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14042,8 +13808,7 @@ define void @s_shuffle_v2i64_v8i64__3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14131,8 +13896,7 @@ define void @s_shuffle_v2i64_v8i64__5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14221,8 +13985,7 @@ define void @s_shuffle_v2i64_v8i64__7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14280,8 +14043,7 @@ define void @s_shuffle_v2i64_v8i64__9_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14367,8 +14129,7 @@ define void @s_shuffle_v2i64_v8i64__11_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14458,8 +14219,7 @@ define void @s_shuffle_v2i64_v8i64__13_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14550,8 +14310,7 @@ define void @s_shuffle_v2i64_v8i64__15_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14610,10 +14369,8 @@ define void @s_shuffle_v2i64_v8i64__15_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14668,8 +14425,7 @@ define void @s_shuffle_v2i64_v8i64__15_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14750,10 +14506,8 @@ define void @s_shuffle_v2i64_v8i64__15_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14808,8 +14562,7 @@ define void @s_shuffle_v2i64_v8i64__15_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14894,10 +14647,8 @@ define void @s_shuffle_v2i64_v8i64__15_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s30 -; GFX942-NEXT: s_mov_b32 s9, s31 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[30:31] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -14984,8 +14735,7 @@ define void @s_shuffle_v2i64_v8i64__15_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15094,10 +14844,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s30 -; GFX942-NEXT: s_mov_b32 s9, s31 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[30:31] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15212,8 +14960,7 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s30 -; GFX942-NEXT: s_mov_b32 s13, s31 +; GFX942-NEXT: s_mov_b64 s[12:13], s[30:31] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -15270,10 +15017,8 @@ define void @s_shuffle_v2i64_v8i64__15_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15286,18 +15031,43 @@ define void @s_shuffle_v2i64_v8i64__15_8() { } define void @s_shuffle_v2i64_v8i64__15_9() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s22 -; GFX9-NEXT: s_mov_b32 s9, s23 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_9: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_9: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15342,10 +15112,8 @@ define void @s_shuffle_v2i64_v8i64__15_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15358,18 +15126,43 @@ define void @s_shuffle_v2i64_v8i64__15_10() { } define void @s_shuffle_v2i64_v8i64__15_11() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s18 -; GFX9-NEXT: s_mov_b32 s9, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_11: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_11: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15378,20 +15171,48 @@ define void @s_shuffle_v2i64_v8i64__15_11() { } define void @s_shuffle_v2i64_v8i64__15_12() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s18 -; GFX9-NEXT: s_mov_b32 s9, s19 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_12: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_12: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -15436,8 +15257,7 @@ define void @s_shuffle_v2i64_v8i64__15_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15486,10 +15306,8 @@ define void @s_shuffle_v2i64_v8i64__15_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15538,8 +15356,7 @@ define void @s_shuffle_v2i64_v8i64__15_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -15586,8 +15403,7 @@ define void @s_shuffle_v2i64_v8i64__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15599,18 +15415,43 @@ define void @s_shuffle_v2i64_v8i64__u_0() { } define void @s_shuffle_v2i64_v8i64__0_0() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15654,10 +15495,8 @@ define void @s_shuffle_v2i64_v8i64__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15669,18 +15508,43 @@ define void @s_shuffle_v2i64_v8i64__1_0() { } define void @s_shuffle_v2i64_v8i64__2_0() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s5 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -15724,10 +15588,8 @@ define void @s_shuffle_v2i64_v8i64__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15775,8 +15637,7 @@ define void @s_shuffle_v2i64_v8i64__4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15824,10 +15685,8 @@ define void @s_shuffle_v2i64_v8i64__5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15875,8 +15734,7 @@ define void @s_shuffle_v2i64_v8i64__6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -15926,10 +15784,8 @@ define void @s_shuffle_v2i64_v8i64__7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -15973,8 +15829,7 @@ define void @s_shuffle_v2i64_v8i64__8_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16032,10 +15887,8 @@ define void @s_shuffle_v2i64_v8i64__9_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16136,8 +15989,7 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16196,10 +16048,8 @@ define void @s_shuffle_v2i64_v8i64__11_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16262,8 +16112,7 @@ define void @s_shuffle_v2i64_v8i64__12_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16328,10 +16177,8 @@ define void @s_shuffle_v2i64_v8i64__13_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16390,8 +16237,7 @@ define void @s_shuffle_v2i64_v8i64__14_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s18, s0 -; GFX942-NEXT: s_mov_b32 s19, s1 +; GFX942-NEXT: s_mov_b64 s[18:19], s[0:1] ; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] ; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART @@ -16486,18 +16332,43 @@ define void @s_shuffle_v2i64_v8i64__0_1() { } define void @s_shuffle_v2i64_v8i64__1_1() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16505,18 +16376,43 @@ define void @s_shuffle_v2i64_v8i64__1_1() { } define void @s_shuffle_v2i64_v8i64__2_1() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s6 -; GFX9-NEXT: s_mov_b32 s11, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16524,18 +16420,43 @@ define void @s_shuffle_v2i64_v8i64__2_1() { } define void @s_shuffle_v2i64_v8i64__3_1() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16579,8 +16500,7 @@ define void @s_shuffle_v2i64_v8i64__4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16592,18 +16512,43 @@ define void @s_shuffle_v2i64_v8i64__4_1() { } define void @s_shuffle_v2i64_v8i64__5_1() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s18 -; GFX9-NEXT: s_mov_b32 s9, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16647,8 +16592,7 @@ define void @s_shuffle_v2i64_v8i64__6_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -16662,18 +16606,43 @@ define void @s_shuffle_v2i64_v8i64__6_1() { } define void @s_shuffle_v2i64_v8i64__7_1() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s22 -; GFX9-NEXT: s_mov_b32 s9, s23 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -16763,8 +16732,7 @@ define void @s_shuffle_v2i64_v8i64__9_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16865,8 +16833,7 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16921,8 +16888,7 @@ define void @s_shuffle_v2i64_v8i64__11_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -16985,8 +16951,7 @@ define void @s_shuffle_v2i64_v8i64__12_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17047,8 +17012,7 @@ define void @s_shuffle_v2i64_v8i64__13_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17107,8 +17071,7 @@ define void @s_shuffle_v2i64_v8i64__14_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s18, s2 -; GFX942-NEXT: s_mov_b32 s19, s3 +; GFX942-NEXT: s_mov_b64 s[18:19], s[2:3] ; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] ; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART @@ -17155,8 +17118,7 @@ define void @s_shuffle_v2i64_v8i64__u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17168,18 +17130,43 @@ define void @s_shuffle_v2i64_v8i64__u_2() { } define void @s_shuffle_v2i64_v8i64__0_2() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17223,10 +17210,8 @@ define void @s_shuffle_v2i64_v8i64__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17238,18 +17223,43 @@ define void @s_shuffle_v2i64_v8i64__1_2() { } define void @s_shuffle_v2i64_v8i64__2_2() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -17293,10 +17303,8 @@ define void @s_shuffle_v2i64_v8i64__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17344,8 +17352,7 @@ define void @s_shuffle_v2i64_v8i64__4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17393,10 +17400,8 @@ define void @s_shuffle_v2i64_v8i64__5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17444,8 +17449,7 @@ define void @s_shuffle_v2i64_v8i64__6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -17495,10 +17499,8 @@ define void @s_shuffle_v2i64_v8i64__7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17542,8 +17544,7 @@ define void @s_shuffle_v2i64_v8i64__8_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17623,10 +17624,8 @@ define void @s_shuffle_v2i64_v8i64__9_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17708,8 +17707,7 @@ define void @s_shuffle_v2i64_v8i64__10_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17796,10 +17794,8 @@ define void @s_shuffle_v2i64_v8i64__11_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17858,8 +17854,7 @@ define void @s_shuffle_v2i64_v8i64__12_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -17940,10 +17935,8 @@ define void @s_shuffle_v2i64_v8i64__13_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18002,8 +17995,7 @@ define void @s_shuffle_v2i64_v8i64__14_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s22, s4 -; GFX942-NEXT: s_mov_b32 s23, s5 +; GFX942-NEXT: s_mov_b64 s[22:23], s[4:5] ; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] ; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART @@ -18058,37 +18050,87 @@ define void @s_shuffle_v2i64_v8i64__u_3() { } define void @s_shuffle_v2i64_v8i64__0_3() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__1_3() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__1_3() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18136,18 +18178,43 @@ define void @s_shuffle_v2i64_v8i64__2_3() { } define void @s_shuffle_v2i64_v8i64__3_3() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18191,8 +18258,7 @@ define void @s_shuffle_v2i64_v8i64__4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18204,18 +18270,43 @@ define void @s_shuffle_v2i64_v8i64__4_3() { } define void @s_shuffle_v2i64_v8i64__5_3() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18259,8 +18350,7 @@ define void @s_shuffle_v2i64_v8i64__6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -18274,18 +18364,43 @@ define void @s_shuffle_v2i64_v8i64__6_3() { } define void @s_shuffle_v2i64_v8i64__7_3() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s18 -; GFX9-NEXT: s_mov_b32 s9, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18375,8 +18490,7 @@ define void @s_shuffle_v2i64_v8i64__9_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18458,8 +18572,7 @@ define void @s_shuffle_v2i64_v8i64__10_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18520,8 +18633,7 @@ define void @s_shuffle_v2i64_v8i64__11_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18580,8 +18692,7 @@ define void @s_shuffle_v2i64_v8i64__12_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18636,8 +18747,7 @@ define void @s_shuffle_v2i64_v8i64__13_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18696,8 +18806,7 @@ define void @s_shuffle_v2i64_v8i64__14_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s22, s6 -; GFX942-NEXT: s_mov_b32 s23, s7 +; GFX942-NEXT: s_mov_b64 s[22:23], s[6:7] ; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] ; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART @@ -18744,8 +18853,7 @@ define void @s_shuffle_v2i64_v8i64__u_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s8 -; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -18757,18 +18865,43 @@ define void @s_shuffle_v2i64_v8i64__u_4() { } define void @s_shuffle_v2i64_v8i64__0_4() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s16 -; GFX9-NEXT: s_mov_b32 s11, s17 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18776,20 +18909,48 @@ define void @s_shuffle_v2i64_v8i64__0_4() { } define void @s_shuffle_v2i64_v8i64__1_4() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18797,18 +18958,43 @@ define void @s_shuffle_v2i64_v8i64__1_4() { } define void @s_shuffle_v2i64_v8i64__2_4() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18816,90 +19002,145 @@ define void @s_shuffle_v2i64_v8i64__2_4() { } define void @s_shuffle_v2i64_v8i64__3_4() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__4_4() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) ret void } -define void @s_shuffle_v2i64_v8i64__4_4() { -; GFX900-LABEL: s_shuffle_v2i64_v8i64__4_4: +define void @s_shuffle_v2i64_v8i64__5_4() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s14, s12 -; GFX900-NEXT: s_mov_b32 s15, s13 -; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v2i64_v8i64__4_4: +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s14, s12 -; GFX90A-NEXT: s_mov_b32 s15, s13 -; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4: +; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s8 -; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__5_4() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -18943,8 +19184,7 @@ define void @s_shuffle_v2i64_v8i64__6_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s8 -; GFX942-NEXT: s_mov_b32 s15, s9 +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -18958,20 +19198,48 @@ define void @s_shuffle_v2i64_v8i64__6_4() { } define void @s_shuffle_v2i64_v8i64__7_4() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s18 -; GFX9-NEXT: s_mov_b32 s9, s19 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19011,8 +19279,7 @@ define void @s_shuffle_v2i64_v8i64__8_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s8 -; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19096,10 +19363,8 @@ define void @s_shuffle_v2i64_v8i64__9_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19160,8 +19425,7 @@ define void @s_shuffle_v2i64_v8i64__10_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19246,10 +19510,8 @@ define void @s_shuffle_v2i64_v8i64__11_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19336,8 +19598,7 @@ define void @s_shuffle_v2i64_v8i64__12_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19422,10 +19683,8 @@ define void @s_shuffle_v2i64_v8i64__13_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19512,8 +19771,7 @@ define void @s_shuffle_v2i64_v8i64__14_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s26, s8 -; GFX942-NEXT: s_mov_b32 s27, s9 +; GFX942-NEXT: s_mov_b64 s[26:27], s[8:9] ; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25] ; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX942-NEXT: ;;#ASMSTART @@ -19572,18 +19830,43 @@ define void @s_shuffle_v2i64_v8i64__u_5() { } define void @s_shuffle_v2i64_v8i64__0_5() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s18 -; GFX9-NEXT: s_mov_b32 s11, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19627,8 +19910,7 @@ define void @s_shuffle_v2i64_v8i64__1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19640,18 +19922,43 @@ define void @s_shuffle_v2i64_v8i64__1_5() { } define void @s_shuffle_v2i64_v8i64__2_5() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -19695,8 +20002,7 @@ define void @s_shuffle_v2i64_v8i64__3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19788,8 +20094,7 @@ define void @s_shuffle_v2i64_v8i64__5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -19837,8 +20142,7 @@ define void @s_shuffle_v2i64_v8i64__6_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -19888,8 +20192,7 @@ define void @s_shuffle_v2i64_v8i64__7_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20013,8 +20316,7 @@ define void @s_shuffle_v2i64_v8i64__9_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20069,8 +20371,7 @@ define void @s_shuffle_v2i64_v8i64__10_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20151,8 +20452,7 @@ define void @s_shuffle_v2i64_v8i64__11_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20233,8 +20533,7 @@ define void @s_shuffle_v2i64_v8i64__12_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20315,8 +20614,7 @@ define void @s_shuffle_v2i64_v8i64__13_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20397,8 +20695,7 @@ define void @s_shuffle_v2i64_v8i64__14_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s26, s10 -; GFX942-NEXT: s_mov_b32 s27, s11 +; GFX942-NEXT: s_mov_b64 s[26:27], s[10:11] ; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25] ; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX942-NEXT: ;;#ASMSTART @@ -20445,8 +20742,7 @@ define void @s_shuffle_v2i64_v8i64__u_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20458,18 +20754,43 @@ define void @s_shuffle_v2i64_v8i64__u_6() { } define void @s_shuffle_v2i64_v8i64__0_6() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s20 -; GFX9-NEXT: s_mov_b32 s11, s21 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20511,35 +20832,58 @@ define void @s_shuffle_v2i64_v8i64__1_6() { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__2_6() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__2_6() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s16 -; GFX9-NEXT: s_mov_b32 s11, s17 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -20583,10 +20927,8 @@ define void @s_shuffle_v2i64_v8i64__3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20634,8 +20976,7 @@ define void @s_shuffle_v2i64_v8i64__4_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20683,10 +21024,8 @@ define void @s_shuffle_v2i64_v8i64__5_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20734,8 +21073,7 @@ define void @s_shuffle_v2i64_v8i64__6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -20785,10 +21123,8 @@ define void @s_shuffle_v2i64_v8i64__7_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20832,8 +21168,7 @@ define void @s_shuffle_v2i64_v8i64__8_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -20941,10 +21276,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -21005,8 +21338,7 @@ define void @s_shuffle_v2i64_v8i64__10_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -21115,10 +21447,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -21229,8 +21559,7 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -21339,10 +21668,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -21457,8 +21784,7 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s30, s12 -; GFX942-NEXT: s_mov_b32 s31, s13 +; GFX942-NEXT: s_mov_b64 s[30:31], s[12:13] ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] ; GFX942-NEXT: ;;#ASMSTART @@ -21524,18 +21850,43 @@ define void @s_shuffle_v2i64_v8i64__u_7() { } define void @s_shuffle_v2i64_v8i64__0_7() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s22 -; GFX9-NEXT: s_mov_b32 s11, s23 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -21579,8 +21930,7 @@ define void @s_shuffle_v2i64_v8i64__1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -21594,18 +21944,43 @@ define void @s_shuffle_v2i64_v8i64__1_7() { } define void @s_shuffle_v2i64_v8i64__2_7() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s18 -; GFX9-NEXT: s_mov_b32 s11, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) @@ -21649,8 +22024,7 @@ define void @s_shuffle_v2i64_v8i64__3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -21700,8 +22074,7 @@ define void @s_shuffle_v2i64_v8i64__4_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -21749,8 +22122,7 @@ define void @s_shuffle_v2i64_v8i64__5_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -21845,8 +22217,7 @@ define void @s_shuffle_v2i64_v8i64__7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -22001,8 +22372,7 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s18 -; GFX942-NEXT: s_mov_b32 s13, s19 +; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -22065,8 +22435,7 @@ define void @s_shuffle_v2i64_v8i64__10_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22175,8 +22544,7 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s22 -; GFX942-NEXT: s_mov_b32 s13, s23 +; GFX942-NEXT: s_mov_b64 s[12:13], s[22:23] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -22289,8 +22657,7 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22399,8 +22766,7 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s26 -; GFX942-NEXT: s_mov_b32 s13, s27 +; GFX942-NEXT: s_mov_b64 s[12:13], s[26:27] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -22517,8 +22883,7 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s30, s14 -; GFX942-NEXT: s_mov_b32 s31, s15 +; GFX942-NEXT: s_mov_b64 s[30:31], s[14:15] ; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] ; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] ; GFX942-NEXT: ;;#ASMSTART @@ -22625,8 +22990,7 @@ define void @s_shuffle_v2i64_v8i64__1_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22710,8 +23074,7 @@ define void @s_shuffle_v2i64_v8i64__3_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22799,8 +23162,7 @@ define void @s_shuffle_v2i64_v8i64__5_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22889,8 +23251,7 @@ define void @s_shuffle_v2i64_v8i64__7_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22952,10 +23313,8 @@ define void @s_shuffle_v2i64_v8i64__9_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -22968,18 +23327,43 @@ define void @s_shuffle_v2i64_v8i64__9_8() { } define void @s_shuffle_v2i64_v8i64__10_8() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_8: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s5 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_8: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_8: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_8: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23024,10 +23408,8 @@ define void @s_shuffle_v2i64_v8i64__11_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23076,8 +23458,7 @@ define void @s_shuffle_v2i64_v8i64__12_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23126,10 +23507,8 @@ define void @s_shuffle_v2i64_v8i64__13_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23178,8 +23557,7 @@ define void @s_shuffle_v2i64_v8i64__14_8() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -23277,8 +23655,7 @@ define void @s_shuffle_v2i64_v8i64__0_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23333,8 +23710,7 @@ define void @s_shuffle_v2i64_v8i64__1_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23389,8 +23765,7 @@ define void @s_shuffle_v2i64_v8i64__2_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23491,8 +23866,7 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23573,8 +23947,7 @@ define void @s_shuffle_v2i64_v8i64__4_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23656,8 +24029,7 @@ define void @s_shuffle_v2i64_v8i64__5_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23772,8 +24144,7 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s18 -; GFX942-NEXT: s_mov_b32 s15, s19 +; GFX942-NEXT: s_mov_b64 s[14:15], s[18:19] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -23836,8 +24207,7 @@ define void @s_shuffle_v2i64_v8i64__7_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s26 -; GFX942-NEXT: s_mov_b32 s9, s27 +; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -23891,18 +24261,43 @@ define void @s_shuffle_v2i64_v8i64__8_9() { } define void @s_shuffle_v2i64_v8i64__9_9() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_9: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_9: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23911,18 +24306,43 @@ define void @s_shuffle_v2i64_v8i64__9_9() { } define void @s_shuffle_v2i64_v8i64__10_9() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s6 -; GFX9-NEXT: s_mov_b32 s11, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_9: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_9: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23931,18 +24351,43 @@ define void @s_shuffle_v2i64_v8i64__10_9() { } define void @s_shuffle_v2i64_v8i64__11_9() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_9: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_9: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -23987,8 +24432,7 @@ define void @s_shuffle_v2i64_v8i64__12_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24001,18 +24445,43 @@ define void @s_shuffle_v2i64_v8i64__12_9() { } define void @s_shuffle_v2i64_v8i64__13_9() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_9: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s18 -; GFX9-NEXT: s_mov_b32 s9, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_9: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_9: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_9: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24057,8 +24526,7 @@ define void @s_shuffle_v2i64_v8i64__14_9() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -24105,8 +24573,7 @@ define void @s_shuffle_v2i64_v8i64__u_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24161,8 +24628,7 @@ define void @s_shuffle_v2i64_v8i64__0_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24221,10 +24687,8 @@ define void @s_shuffle_v2i64_v8i64__1_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24279,8 +24743,7 @@ define void @s_shuffle_v2i64_v8i64__2_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24339,10 +24802,8 @@ define void @s_shuffle_v2i64_v8i64__3_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24423,8 +24884,7 @@ define void @s_shuffle_v2i64_v8i64__4_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24501,13 +24961,11 @@ define void @s_shuffle_v2i64_v8i64__5_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24616,8 +25074,7 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s20 -; GFX942-NEXT: s_mov_b32 s15, s21 +; GFX942-NEXT: s_mov_b64 s[14:15], s[20:21] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -24734,10 +25191,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24756,18 +25211,43 @@ define void @s_shuffle_v2i64_v8i64__7_10() { } define void @s_shuffle_v2i64_v8i64__8_10() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_10: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_10: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_10: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24812,10 +25292,8 @@ define void @s_shuffle_v2i64_v8i64__9_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24828,18 +25306,43 @@ define void @s_shuffle_v2i64_v8i64__9_10() { } define void @s_shuffle_v2i64_v8i64__10_10() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_10: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_10: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_10: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_10: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -24884,10 +25387,8 @@ define void @s_shuffle_v2i64_v8i64__11_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24936,8 +25437,7 @@ define void @s_shuffle_v2i64_v8i64__12_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -24986,10 +25486,8 @@ define void @s_shuffle_v2i64_v8i64__13_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25038,8 +25536,7 @@ define void @s_shuffle_v2i64_v8i64__14_10() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -25137,8 +25634,7 @@ define void @s_shuffle_v2i64_v8i64__0_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25239,8 +25735,7 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25295,8 +25790,7 @@ define void @s_shuffle_v2i64_v8i64__2_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25378,8 +25872,7 @@ define void @s_shuffle_v2i64_v8i64__3_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25466,8 +25959,7 @@ define void @s_shuffle_v2i64_v8i64__4_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25522,8 +26014,7 @@ define void @s_shuffle_v2i64_v8i64__5_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25632,8 +26123,7 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s22 -; GFX942-NEXT: s_mov_b32 s15, s23 +; GFX942-NEXT: s_mov_b64 s[14:15], s[22:23] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -25696,8 +26186,7 @@ define void @s_shuffle_v2i64_v8i64__7_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25710,18 +26199,43 @@ define void @s_shuffle_v2i64_v8i64__7_11() { } define void @s_shuffle_v2i64_v8i64__8_11() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_11: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_11: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25730,18 +26244,43 @@ define void @s_shuffle_v2i64_v8i64__8_11() { } define void @s_shuffle_v2i64_v8i64__9_11() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_11: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_11: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25791,18 +26330,43 @@ define void @s_shuffle_v2i64_v8i64__10_11() { } define void @s_shuffle_v2i64_v8i64__11_11() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_11: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_11: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25847,8 +26411,7 @@ define void @s_shuffle_v2i64_v8i64__12_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -25861,18 +26424,43 @@ define void @s_shuffle_v2i64_v8i64__12_11() { } define void @s_shuffle_v2i64_v8i64__13_11() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_11: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_11: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_11: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_11: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -25917,8 +26505,7 @@ define void @s_shuffle_v2i64_v8i64__14_11() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -25965,8 +26552,7 @@ define void @s_shuffle_v2i64_v8i64__u_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s8 -; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26021,8 +26607,7 @@ define void @s_shuffle_v2i64_v8i64__0_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26081,10 +26666,8 @@ define void @s_shuffle_v2i64_v8i64__1_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26139,8 +26722,7 @@ define void @s_shuffle_v2i64_v8i64__2_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26199,10 +26781,8 @@ define void @s_shuffle_v2i64_v8i64__3_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26283,8 +26863,7 @@ define void @s_shuffle_v2i64_v8i64__4_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26361,13 +26940,11 @@ define void @s_shuffle_v2i64_v8i64__5_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26476,8 +27053,7 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s24 -; GFX942-NEXT: s_mov_b32 s15, s25 +; GFX942-NEXT: s_mov_b64 s[14:15], s[24:25] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -26539,159 +27115,263 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_12: +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_12: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-NEXT: s_mov_b64 exec, s[4:5] +; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 +; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[36:51] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s44 +; GFX90A-NEXT: s_mov_b32 s11, s45 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 +; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 +; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-NEXT: s_mov_b64 exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %vec1 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__8_12() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_12: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_12: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %vec1 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__9_12() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_12: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_12: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %vec1 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__10_12() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_12: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_12: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %vec1 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__11_12() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_12: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_12: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX90A-NEXT: s_mov_b64 exec, s[4:5] -; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 -; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[36:51] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s44 -; GFX90A-NEXT: s_mov_b32 s11, s45 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 -; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 -; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 -; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX90A-NEXT: s_mov_b64 exec, s[4:5] -; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12: +; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_12: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v0, s30, 0 -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] -; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s24 -; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %vec1 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__8_12() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s16 -; GFX9-NEXT: s_mov_b32 s11, s17 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %vec1 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__9_12() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %vec1 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__10_12() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %vec1 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__11_12() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26736,8 +27416,7 @@ define void @s_shuffle_v2i64_v8i64__12_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s8 -; GFX942-NEXT: s_mov_b32 s11, s9 +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26750,20 +27429,48 @@ define void @s_shuffle_v2i64_v8i64__12_12() { } define void @s_shuffle_v2i64_v8i64__13_12() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_12: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_12: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_12: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_12: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -26808,8 +27515,7 @@ define void @s_shuffle_v2i64_v8i64__14_12() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s8 -; GFX942-NEXT: s_mov_b32 s15, s9 +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -26911,8 +27617,7 @@ define void @s_shuffle_v2i64_v8i64__0_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -26975,8 +27680,7 @@ define void @s_shuffle_v2i64_v8i64__1_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27037,8 +27741,7 @@ define void @s_shuffle_v2i64_v8i64__2_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27097,8 +27800,7 @@ define void @s_shuffle_v2i64_v8i64__3_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27179,8 +27881,7 @@ define void @s_shuffle_v2i64_v8i64__4_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27261,8 +27962,7 @@ define void @s_shuffle_v2i64_v8i64__5_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27371,8 +28071,7 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s26 -; GFX942-NEXT: s_mov_b32 s15, s27 +; GFX942-NEXT: s_mov_b64 s[14:15], s[26:27] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -27485,8 +28184,7 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27499,18 +28197,43 @@ define void @s_shuffle_v2i64_v8i64__7_13() { } define void @s_shuffle_v2i64_v8i64__8_13() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_13: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s18 -; GFX9-NEXT: s_mov_b32 s11, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_13: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_13: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27525,62 +28248,86 @@ define void @s_shuffle_v2i64_v8i64__9_13() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_13: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <8 x i64> asm "; def $0", "=s"() + %vec1 = call <8 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> + call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) + ret void +} + +define void @s_shuffle_v2i64_v8i64__10_13() { +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_13: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_13: +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_13: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13] -; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13: +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_13: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <8 x i64> asm "; def $0", "=s"() - %vec1 = call <8 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> - call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf) - ret void -} - -define void @s_shuffle_v2i64_v8i64__10_13() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_13: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -27625,8 +28372,7 @@ define void @s_shuffle_v2i64_v8i64__11_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27720,8 +28466,7 @@ define void @s_shuffle_v2i64_v8i64__13_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27770,8 +28515,7 @@ define void @s_shuffle_v2i64_v8i64__14_13() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -27818,8 +28562,7 @@ define void @s_shuffle_v2i64_v8i64__u_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27874,8 +28617,7 @@ define void @s_shuffle_v2i64_v8i64__0_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s24 -; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27934,10 +28676,8 @@ define void @s_shuffle_v2i64_v8i64__1_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -27992,8 +28732,7 @@ define void @s_shuffle_v2i64_v8i64__2_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s24 -; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28052,10 +28791,8 @@ define void @s_shuffle_v2i64_v8i64__3_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28136,8 +28873,7 @@ define void @s_shuffle_v2i64_v8i64__4_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s24 -; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28214,13 +28950,11 @@ define void @s_shuffle_v2i64_v8i64__5_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s24 -; GFX942-NEXT: s_mov_b32 s11, s25 +; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28329,8 +29063,7 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s28 -; GFX942-NEXT: s_mov_b32 s15, s29 +; GFX942-NEXT: s_mov_b64 s[14:15], s[28:29] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -28447,10 +29180,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s28 -; GFX942-NEXT: s_mov_b32 s11, s29 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[28:29] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28469,18 +29200,43 @@ define void @s_shuffle_v2i64_v8i64__7_14() { } define void @s_shuffle_v2i64_v8i64__8_14() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_14: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s20 -; GFX9-NEXT: s_mov_b32 s11, s21 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_14: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_14: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28525,10 +29281,8 @@ define void @s_shuffle_v2i64_v8i64__9_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28541,18 +29295,43 @@ define void @s_shuffle_v2i64_v8i64__9_14() { } define void @s_shuffle_v2i64_v8i64__10_14() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_14: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s16 -; GFX9-NEXT: s_mov_b32 s11, s17 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_14: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_14: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_14: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -28597,10 +29376,8 @@ define void @s_shuffle_v2i64_v8i64__11_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28649,8 +29426,7 @@ define void @s_shuffle_v2i64_v8i64__12_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28699,10 +29475,8 @@ define void @s_shuffle_v2i64_v8i64__13_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28751,8 +29525,7 @@ define void @s_shuffle_v2i64_v8i64__14_14() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -28855,8 +29628,7 @@ define void @s_shuffle_v2i64_v8i64__0_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s26 -; GFX942-NEXT: s_mov_b32 s11, s27 +; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -28908,20 +29680,29 @@ define void @s_shuffle_v2i64_v8i64__1_15() { ; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_15: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v0, s30, 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s16, s2 -; GFX942-NEXT: s_mov_b32 s17, s3 -; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] -; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19] +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s31, v0, 1 +; GFX942-NEXT: v_readlane_b32 s30, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() @@ -28973,8 +29754,7 @@ define void @s_shuffle_v2i64_v8i64__2_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s26 -; GFX942-NEXT: s_mov_b32 s11, s27 +; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -29027,16 +29807,15 @@ define void @s_shuffle_v2i64_v8i64__3_15() { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s20, s6 -; GFX942-NEXT: s_mov_b32 s21, s7 -; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] -; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19] +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -29117,8 +29896,7 @@ define void @s_shuffle_v2i64_v8i64__4_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:27] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s26 -; GFX942-NEXT: s_mov_b32 s11, s27 +; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -29193,16 +29971,15 @@ define void @s_shuffle_v2i64_v8i64__5_15() { ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:15] +; GFX942-NEXT: ; def s[8:23] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:27] +; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s24, s10 -; GFX942-NEXT: s_mov_b32 s25, s11 -; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25] -; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27] +; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19] +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -29311,8 +30088,7 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:31] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s30 -; GFX942-NEXT: s_mov_b32 s15, s31 +; GFX942-NEXT: s_mov_b64 s[14:15], s[30:31] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -29418,30 +30194,19 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_15: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v0, s30, 0 -; GFX942-NEXT: v_writelane_b32 v0, s31, 1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:31] +; GFX942-NEXT: ; def s[4:19] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s28, s14 -; GFX942-NEXT: s_mov_b32 s29, s15 -; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29] -; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19] +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s31, v0, 1 -; GFX942-NEXT: v_readlane_b32 s30, v0, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() @@ -29451,18 +30216,43 @@ define void @s_shuffle_v2i64_v8i64__7_15() { } define void @s_shuffle_v2i64_v8i64__8_15() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_15: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:23] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s22 -; GFX9-NEXT: s_mov_b32 s11, s23 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_15: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_15: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29507,8 +30297,7 @@ define void @s_shuffle_v2i64_v8i64__9_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -29523,18 +30312,43 @@ define void @s_shuffle_v2i64_v8i64__9_15() { } define void @s_shuffle_v2i64_v8i64__10_15() { -; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_15: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:19] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s18 -; GFX9-NEXT: s_mov_b32 s11, s19 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_15: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_15: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_15: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <8 x i64> asm "; def $0", "=s"() %vec1 = call <8 x i64> asm "; def $0", "=s"() %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> @@ -29579,8 +30393,7 @@ define void @s_shuffle_v2i64_v8i64__11_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART @@ -29631,8 +30444,7 @@ define void @s_shuffle_v2i64_v8i64__12_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -29681,8 +30493,7 @@ define void @s_shuffle_v2i64_v8i64__13_15() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] ; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll index 7f8f2dbbb09a1..54e700625d72c 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll @@ -88,8 +88,7 @@ define void @v_shuffle_v2p0_v2p0__1_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -144,8 +143,7 @@ define void @v_shuffle_v2p0_v2p0__3_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -204,10 +202,8 @@ define void @v_shuffle_v2p0_v2p0__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -262,8 +258,7 @@ define void @v_shuffle_v2p0_v2p0__3_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -309,8 +304,7 @@ define void @v_shuffle_v2p0_v2p0__3_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -355,8 +349,7 @@ define void @v_shuffle_v2p0_v2p0__3_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -401,8 +394,7 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -446,8 +438,7 @@ define void @v_shuffle_v2p0_v2p0__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -492,8 +483,7 @@ define void @v_shuffle_v2p0_v2p0__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -537,8 +527,7 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -660,8 +649,7 @@ define void @v_shuffle_v2p0_v2p0__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -794,8 +782,7 @@ define void @v_shuffle_v2p0_v2p0__1_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v4, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v2 -; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -900,8 +887,7 @@ define void @v_shuffle_v2p0_v2p0__0_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -956,8 +942,7 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:3] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1095,8 +1080,7 @@ define void @s_shuffle_v2p0_v2p0__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1154,8 +1138,7 @@ define void @s_shuffle_v2p0_v2p0__3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1213,10 +1196,8 @@ define void @s_shuffle_v2p0_v2p0__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1270,8 +1251,7 @@ define void @s_shuffle_v2p0_v2p0__3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1320,10 +1300,8 @@ define void @s_shuffle_v2p0_v2p0__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1336,18 +1314,43 @@ define void @s_shuffle_v2p0_v2p0__3_2() { } define void @s_shuffle_v2p0_v2p0__3_3() { -; GFX9-LABEL: s_shuffle_v2p0_v2p0__3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v2p0__3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v2p0__3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> @@ -1388,8 +1391,7 @@ define void @s_shuffle_v2p0_v2p0__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1401,18 +1403,43 @@ define void @s_shuffle_v2p0_v2p0__u_0() { } define void @s_shuffle_v2p0_v2p0__0_0() { -; GFX9-LABEL: s_shuffle_v2p0_v2p0__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v2p0__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v2p0__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1456,10 +1483,8 @@ define void @s_shuffle_v2p0_v2p0__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1503,8 +1528,7 @@ define void @s_shuffle_v2p0_v2p0__2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1596,18 +1620,43 @@ define void @s_shuffle_v2p0_v2p0__0_1() { } define void @s_shuffle_v2p0_v2p0__1_1() { -; GFX9-LABEL: s_shuffle_v2p0_v2p0__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v2p0__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v2p0__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -1741,8 +1790,7 @@ define void @s_shuffle_v2p0_v2p0__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1850,8 +1898,7 @@ define void @s_shuffle_v2p0_v2p0__0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -1905,8 +1952,7 @@ define void @s_shuffle_v2p0_v2p0__1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll index 27a6cf11c4cb1..9c770bf1c77cc 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll @@ -127,8 +127,7 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -223,8 +222,7 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -283,10 +281,8 @@ define void @v_shuffle_v2p0_v3p0__5_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -341,8 +337,7 @@ define void @v_shuffle_v2p0_v3p0__5_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v8 -; GFX942-NEXT: v_mov_b32_e32 v1, v9 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -397,8 +392,7 @@ define void @v_shuffle_v2p0_v3p0__5_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v10 -; GFX942-NEXT: v_mov_b32_e32 v3, v11 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -447,10 +441,8 @@ define void @v_shuffle_v2p0_v3p0__5_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -495,8 +487,7 @@ define void @v_shuffle_v2p0_v3p0__5_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -541,8 +532,7 @@ define void @v_shuffle_v2p0_v3p0__5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -587,8 +577,7 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -632,8 +621,7 @@ define void @v_shuffle_v2p0_v3p0__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -677,8 +665,7 @@ define void @v_shuffle_v2p0_v3p0__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -726,10 +713,8 @@ define void @v_shuffle_v2p0_v3p0__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -773,8 +758,7 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -828,8 +812,7 @@ define void @v_shuffle_v2p0_v3p0__4_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -952,8 +935,7 @@ define void @v_shuffle_v2p0_v3p0__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -997,8 +979,7 @@ define void @v_shuffle_v2p0_v3p0__2_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1091,8 +1072,7 @@ define void @v_shuffle_v2p0_v3p0__4_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1176,8 +1156,7 @@ define void @v_shuffle_v2p0_v3p0__0_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1260,8 +1239,7 @@ define void @v_shuffle_v2p0_v3p0__2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1354,8 +1332,7 @@ define void @v_shuffle_v2p0_v3p0__4_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1489,8 +1466,7 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v4 -; GFX942-NEXT: v_mov_b32_e32 v1, v5 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1545,8 +1521,7 @@ define void @v_shuffle_v2p0_v3p0__4_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1641,8 +1616,7 @@ define void @v_shuffle_v2p0_v3p0__0_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1697,8 +1671,7 @@ define void @v_shuffle_v2p0_v3p0__1_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1753,8 +1726,7 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1839,8 +1811,7 @@ define void @v_shuffle_v2p0_v3p0__4_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1935,8 +1906,7 @@ define void @v_shuffle_v2p0_v3p0__0_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1991,8 +1961,7 @@ define void @v_shuffle_v2p0_v3p0__1_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2047,8 +2016,7 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v4 -; GFX942-NEXT: v_mov_b32_e32 v9, v5 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2093,8 +2061,7 @@ define void @v_shuffle_v2p0_v3p0__3_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:5] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v6, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2232,8 +2199,7 @@ define void @s_shuffle_v2p0_v3p0__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2273,8 +2239,7 @@ define void @s_shuffle_v2p0_v3p0__2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2332,8 +2297,7 @@ define void @s_shuffle_v2p0_v3p0__4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2374,8 +2338,7 @@ define void @s_shuffle_v2p0_v3p0__5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2430,11 +2393,11 @@ define void @s_shuffle_v2p0_v3p0__5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2484,8 +2447,7 @@ define void @s_shuffle_v2p0_v3p0__5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2539,10 +2501,8 @@ define void @s_shuffle_v2p0_v3p0__5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2587,10 +2547,8 @@ define void @s_shuffle_v2p0_v3p0__5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2603,18 +2561,43 @@ define void @s_shuffle_v2p0_v3p0__5_3() { } define void @s_shuffle_v2p0_v3p0__5_4() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__5_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__5_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__5_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -2659,10 +2642,8 @@ define void @s_shuffle_v2p0_v3p0__5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2707,8 +2688,7 @@ define void @s_shuffle_v2p0_v3p0__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2720,18 +2700,43 @@ define void @s_shuffle_v2p0_v3p0__u_0() { } define void @s_shuffle_v2p0_v3p0__0_0() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -2775,10 +2780,8 @@ define void @s_shuffle_v2p0_v3p0__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2822,10 +2825,8 @@ define void @s_shuffle_v2p0_v3p0__2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2869,8 +2870,7 @@ define void @s_shuffle_v2p0_v3p0__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -2924,13 +2924,11 @@ define void @s_shuffle_v2p0_v3p0__4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3023,18 +3021,43 @@ define void @s_shuffle_v2p0_v3p0__0_1() { } define void @s_shuffle_v2p0_v3p0__1_1() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3042,18 +3065,43 @@ define void @s_shuffle_v2p0_v3p0__1_1() { } define void @s_shuffle_v2p0_v3p0__2_1() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__2_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3142,8 +3190,7 @@ define void @s_shuffle_v2p0_v3p0__4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3188,8 +3235,7 @@ define void @s_shuffle_v2p0_v3p0__u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3201,18 +3247,43 @@ define void @s_shuffle_v2p0_v3p0__u_2() { } define void @s_shuffle_v2p0_v3p0__0_2() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__0_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -3256,10 +3327,8 @@ define void @s_shuffle_v2p0_v3p0__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3307,10 +3376,8 @@ define void @s_shuffle_v2p0_v3p0__2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3354,8 +3421,7 @@ define void @s_shuffle_v2p0_v3p0__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3412,10 +3478,8 @@ define void @s_shuffle_v2p0_v3p0__4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3514,8 +3578,7 @@ define void @s_shuffle_v2p0_v3p0__1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3555,8 +3618,7 @@ define void @s_shuffle_v2p0_v3p0__2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3618,10 +3680,8 @@ define void @s_shuffle_v2p0_v3p0__4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3716,8 +3776,7 @@ define void @s_shuffle_v2p0_v3p0__0_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3771,8 +3830,7 @@ define void @s_shuffle_v2p0_v3p0__1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3826,8 +3884,7 @@ define void @s_shuffle_v2p0_v3p0__2_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3881,18 +3938,43 @@ define void @s_shuffle_v2p0_v3p0__3_4() { } define void @s_shuffle_v2p0_v3p0__4_4() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -3933,8 +4015,7 @@ define void @s_shuffle_v2p0_v3p0__u_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3988,8 +4069,7 @@ define void @s_shuffle_v2p0_v3p0__0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4047,10 +4127,8 @@ define void @s_shuffle_v2p0_v3p0__1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4104,10 +4182,8 @@ define void @s_shuffle_v2p0_v3p0__2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4120,18 +4196,43 @@ define void @s_shuffle_v2p0_v3p0__2_5() { } define void @s_shuffle_v2p0_v3p0__3_5() { -; GFX9-LABEL: s_shuffle_v2p0_v3p0__3_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v3p0__3_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v3p0__3_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> @@ -4176,10 +4277,8 @@ define void @s_shuffle_v2p0_v3p0__4_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll index ae31524ebaa7f..47634638d7674 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll @@ -166,8 +166,7 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -302,8 +301,7 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -362,10 +360,8 @@ define void @v_shuffle_v2p0_v4p0__7_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -420,8 +416,7 @@ define void @v_shuffle_v2p0_v4p0__7_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v10 -; GFX942-NEXT: v_mov_b32_e32 v1, v11 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -476,8 +471,7 @@ define void @v_shuffle_v2p0_v4p0__7_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v12 -; GFX942-NEXT: v_mov_b32_e32 v3, v13 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -532,8 +526,7 @@ define void @v_shuffle_v2p0_v4p0__7_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v14 -; GFX942-NEXT: v_mov_b32_e32 v5, v15 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15] ; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -582,10 +575,8 @@ define void @v_shuffle_v2p0_v4p0__7_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -630,8 +621,7 @@ define void @v_shuffle_v2p0_v4p0__7_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -676,8 +666,7 @@ define void @v_shuffle_v2p0_v4p0__7_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -722,8 +711,7 @@ define void @v_shuffle_v2p0_v4p0__7_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -768,8 +756,7 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -813,8 +800,7 @@ define void @v_shuffle_v2p0_v4p0__0_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -858,8 +844,7 @@ define void @v_shuffle_v2p0_v4p0__1_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -903,8 +888,7 @@ define void @v_shuffle_v2p0_v4p0__2_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -952,10 +936,8 @@ define void @v_shuffle_v2p0_v4p0__3_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -999,8 +981,7 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v1 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1054,8 +1035,7 @@ define void @v_shuffle_v2p0_v4p0__5_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1110,8 +1090,7 @@ define void @v_shuffle_v2p0_v4p0__6_0(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v0 -; GFX942-NEXT: v_mov_b32_e32 v9, v1 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1234,8 +1213,7 @@ define void @v_shuffle_v2p0_v4p0__1_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1279,8 +1257,7 @@ define void @v_shuffle_v2p0_v4p0__2_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v2 -; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1324,8 +1301,7 @@ define void @v_shuffle_v2p0_v4p0__3_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1418,8 +1394,7 @@ define void @v_shuffle_v2p0_v4p0__5_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v2 -; GFX942-NEXT: v_mov_b32_e32 v9, v3 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1474,8 +1449,7 @@ define void @v_shuffle_v2p0_v4p0__6_1(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v2 -; GFX942-NEXT: v_mov_b32_e32 v11, v3 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1559,8 +1533,7 @@ define void @v_shuffle_v2p0_v4p0__0_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1643,8 +1616,7 @@ define void @v_shuffle_v2p0_v4p0__2_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1688,8 +1660,7 @@ define void @v_shuffle_v2p0_v4p0__3_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1782,8 +1753,7 @@ define void @v_shuffle_v2p0_v4p0__5_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v4 -; GFX942-NEXT: v_mov_b32_e32 v11, v5 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1838,8 +1808,7 @@ define void @v_shuffle_v2p0_v4p0__6_2(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v4 -; GFX942-NEXT: v_mov_b32_e32 v13, v5 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1923,8 +1892,7 @@ define void @v_shuffle_v2p0_v4p0__0_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -1968,8 +1936,7 @@ define void @v_shuffle_v2p0_v4p0__1_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2052,8 +2019,7 @@ define void @v_shuffle_v2p0_v4p0__3_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2146,8 +2112,7 @@ define void @v_shuffle_v2p0_v4p0__5_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2202,8 +2167,7 @@ define void @v_shuffle_v2p0_v4p0__6_3(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v14, v6 -; GFX942-NEXT: v_mov_b32_e32 v15, v7 +; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2376,8 +2340,7 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v0, v6 -; GFX942-NEXT: v_mov_b32_e32 v1, v7 +; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2432,8 +2395,7 @@ define void @v_shuffle_v2p0_v4p0__5_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v0 -; GFX942-NEXT: v_mov_b32_e32 v5, v1 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2478,8 +2440,7 @@ define void @v_shuffle_v2p0_v4p0__6_4(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v0 -; GFX942-NEXT: v_mov_b32_e32 v7, v1 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2574,8 +2535,7 @@ define void @v_shuffle_v2p0_v4p0__0_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2630,8 +2590,7 @@ define void @v_shuffle_v2p0_v4p0__1_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2686,8 +2645,7 @@ define void @v_shuffle_v2p0_v4p0__2_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v8 -; GFX942-NEXT: v_mov_b32_e32 v7, v9 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2742,8 +2700,7 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v8, v6 -; GFX942-NEXT: v_mov_b32_e32 v9, v7 +; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2828,8 +2785,7 @@ define void @v_shuffle_v2p0_v4p0__5_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v2 -; GFX942-NEXT: v_mov_b32_e32 v5, v3 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2874,8 +2830,7 @@ define void @v_shuffle_v2p0_v4p0__6_5(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v2 -; GFX942-NEXT: v_mov_b32_e32 v7, v3 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -2970,8 +2925,7 @@ define void @v_shuffle_v2p0_v4p0__0_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3026,8 +2980,7 @@ define void @v_shuffle_v2p0_v4p0__1_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v8 -; GFX942-NEXT: v_mov_b32_e32 v5, v9 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3082,8 +3035,7 @@ define void @v_shuffle_v2p0_v4p0__2_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v10 -; GFX942-NEXT: v_mov_b32_e32 v7, v11 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3138,8 +3090,7 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v10, v6 -; GFX942-NEXT: v_mov_b32_e32 v11, v7 +; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3184,8 +3135,7 @@ define void @v_shuffle_v2p0_v4p0__4_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v4 -; GFX942-NEXT: v_mov_b32_e32 v3, v5 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3270,8 +3220,7 @@ define void @v_shuffle_v2p0_v4p0__6_6(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v4 -; GFX942-NEXT: v_mov_b32_e32 v7, v5 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3366,8 +3315,7 @@ define void @v_shuffle_v2p0_v4p0__0_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[2:9] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v8 -; GFX942-NEXT: v_mov_b32_e32 v3, v9 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9] ; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3422,8 +3370,7 @@ define void @v_shuffle_v2p0_v4p0__1_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[4:11] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v10 -; GFX942-NEXT: v_mov_b32_e32 v5, v11 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11] ; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3478,8 +3425,7 @@ define void @v_shuffle_v2p0_v4p0__2_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[6:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v6, v12 -; GFX942-NEXT: v_mov_b32_e32 v7, v13 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13] ; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3534,8 +3480,7 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: v_mov_b32_e32 v12, v6 -; GFX942-NEXT: v_mov_b32_e32 v13, v7 +; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3580,8 +3525,7 @@ define void @v_shuffle_v2p0_v4p0__4_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v6 -; GFX942-NEXT: v_mov_b32_e32 v3, v7 +; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3626,8 +3570,7 @@ define void @v_shuffle_v2p0_v4p0__5_7(ptr addrspace(1) inreg %ptr) { ; GFX942-NEXT: ; def v[0:7] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_mov_b32_e32 v8, 0 -; GFX942-NEXT: v_mov_b32_e32 v4, v6 -; GFX942-NEXT: v_mov_b32_e32 v5, v7 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1] ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] @@ -3765,8 +3708,7 @@ define void @s_shuffle_v2p0_v4p0__1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3850,8 +3792,7 @@ define void @s_shuffle_v2p0_v4p0__3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3909,8 +3850,7 @@ define void @s_shuffle_v2p0_v4p0__5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -3996,8 +3936,7 @@ define void @s_shuffle_v2p0_v4p0__7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4056,10 +3995,8 @@ define void @s_shuffle_v2p0_v4p0__7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4113,8 +4050,7 @@ define void @s_shuffle_v2p0_v4p0__7_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4172,10 +4108,8 @@ define void @s_shuffle_v2p0_v4p0__7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4230,8 +4164,7 @@ define void @s_shuffle_v2p0_v4p0__7_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4280,10 +4213,8 @@ define void @s_shuffle_v2p0_v4p0__7_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4296,18 +4227,43 @@ define void @s_shuffle_v2p0_v4p0__7_4() { } define void @s_shuffle_v2p0_v4p0__7_5() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__7_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__7_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__7_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4352,10 +4308,8 @@ define void @s_shuffle_v2p0_v4p0__7_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4368,18 +4322,43 @@ define void @s_shuffle_v2p0_v4p0__7_6() { } define void @s_shuffle_v2p0_v4p0__7_7() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -4420,8 +4399,7 @@ define void @s_shuffle_v2p0_v4p0__u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4433,18 +4411,43 @@ define void @s_shuffle_v2p0_v4p0__u_0() { } define void @s_shuffle_v2p0_v4p0__0_0() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4488,10 +4491,8 @@ define void @s_shuffle_v2p0_v4p0__1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4503,18 +4504,43 @@ define void @s_shuffle_v2p0_v4p0__1_0() { } define void @s_shuffle_v2p0_v4p0__2_0() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s5 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4558,10 +4584,8 @@ define void @s_shuffle_v2p0_v4p0__3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4605,8 +4629,7 @@ define void @s_shuffle_v2p0_v4p0__4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4664,10 +4687,8 @@ define void @s_shuffle_v2p0_v4p0__5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4722,8 +4743,7 @@ define void @s_shuffle_v2p0_v4p0__6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -4816,18 +4836,43 @@ define void @s_shuffle_v2p0_v4p0__0_1() { } define void @s_shuffle_v2p0_v4p0__1_1() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4835,18 +4880,43 @@ define void @s_shuffle_v2p0_v4p0__1_1() { } define void @s_shuffle_v2p0_v4p0__2_1() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s6 -; GFX9-NEXT: s_mov_b32 s11, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4854,18 +4924,43 @@ define void @s_shuffle_v2p0_v4p0__2_1() { } define void @s_shuffle_v2p0_v4p0__3_1() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__3_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__3_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -4954,8 +5049,7 @@ define void @s_shuffle_v2p0_v4p0__5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5010,8 +5104,7 @@ define void @s_shuffle_v2p0_v4p0__6_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5056,8 +5149,7 @@ define void @s_shuffle_v2p0_v4p0__u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5069,18 +5161,43 @@ define void @s_shuffle_v2p0_v4p0__u_2() { } define void @s_shuffle_v2p0_v4p0__0_2() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5124,10 +5241,8 @@ define void @s_shuffle_v2p0_v4p0__1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5139,18 +5254,43 @@ define void @s_shuffle_v2p0_v4p0__1_2() { } define void @s_shuffle_v2p0_v4p0__2_2() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5194,10 +5334,8 @@ define void @s_shuffle_v2p0_v4p0__3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5241,8 +5379,7 @@ define void @s_shuffle_v2p0_v4p0__4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5299,10 +5436,8 @@ define void @s_shuffle_v2p0_v4p0__5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5357,8 +5492,7 @@ define void @s_shuffle_v2p0_v4p0__6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5411,18 +5545,43 @@ define void @s_shuffle_v2p0_v4p0__u_3() { } define void @s_shuffle_v2p0_v4p0__0_3() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5430,18 +5589,43 @@ define void @s_shuffle_v2p0_v4p0__0_3() { } define void @s_shuffle_v2p0_v4p0__1_3() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__1_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__1_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__1_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5489,18 +5673,43 @@ define void @s_shuffle_v2p0_v4p0__2_3() { } define void @s_shuffle_v2p0_v4p0__3_3() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf) @@ -5590,8 +5799,7 @@ define void @s_shuffle_v2p0_v4p0__5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5646,8 +5854,7 @@ define void @s_shuffle_v2p0_v4p0__6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5746,8 +5953,7 @@ define void @s_shuffle_v2p0_v4p0__1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5831,8 +6037,7 @@ define void @s_shuffle_v2p0_v4p0__3_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5894,10 +6099,8 @@ define void @s_shuffle_v2p0_v4p0__5_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -5910,18 +6113,43 @@ define void @s_shuffle_v2p0_v4p0__5_4() { } define void @s_shuffle_v2p0_v4p0__6_4() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s4 -; GFX9-NEXT: s_mov_b32 s11, s5 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6012,8 +6240,7 @@ define void @s_shuffle_v2p0_v4p0__0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6067,8 +6294,7 @@ define void @s_shuffle_v2p0_v4p0__1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6123,8 +6349,7 @@ define void @s_shuffle_v2p0_v4p0__2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6178,8 +6403,7 @@ define void @s_shuffle_v2p0_v4p0__3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6233,18 +6457,43 @@ define void @s_shuffle_v2p0_v4p0__4_5() { } define void @s_shuffle_v2p0_v4p0__5_5() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6253,18 +6502,43 @@ define void @s_shuffle_v2p0_v4p0__5_5() { } define void @s_shuffle_v2p0_v4p0__6_5() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s6 -; GFX9-NEXT: s_mov_b32 s11, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6305,8 +6579,7 @@ define void @s_shuffle_v2p0_v4p0__u_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6360,8 +6633,7 @@ define void @s_shuffle_v2p0_v4p0__0_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6419,10 +6691,8 @@ define void @s_shuffle_v2p0_v4p0__1_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6477,8 +6747,7 @@ define void @s_shuffle_v2p0_v4p0__2_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6536,10 +6805,8 @@ define void @s_shuffle_v2p0_v4p0__3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6552,18 +6819,43 @@ define void @s_shuffle_v2p0_v4p0__3_6() { } define void @s_shuffle_v2p0_v4p0__4_6() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__4_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__4_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__4_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6608,10 +6900,8 @@ define void @s_shuffle_v2p0_v4p0__5_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6624,18 +6914,43 @@ define void @s_shuffle_v2p0_v4p0__5_6() { } define void @s_shuffle_v2p0_v4p0__6_6() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6726,8 +7041,7 @@ define void @s_shuffle_v2p0_v4p0__0_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6782,8 +7096,7 @@ define void @s_shuffle_v2p0_v4p0__1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6838,8 +7151,7 @@ define void @s_shuffle_v2p0_v4p0__2_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6894,8 +7206,7 @@ define void @s_shuffle_v2p0_v4p0__3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:11] ; GFX942-NEXT: ;;#ASMEND @@ -6908,18 +7219,43 @@ define void @s_shuffle_v2p0_v4p0__3_7() { } define void @s_shuffle_v2p0_v4p0__4_7() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__4_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__4_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__4_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> @@ -6928,18 +7264,43 @@ define void @s_shuffle_v2p0_v4p0__4_7() { } define void @s_shuffle_v2p0_v4p0__5_7() { -; GFX9-LABEL: s_shuffle_v2p0_v4p0__5_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[4:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s6 -; GFX9-NEXT: s_mov_b32 s9, s7 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v2p0_v4p0__5_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v2p0_v4p0__5_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll index a15fc3212f474..261257533208b 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll @@ -2266,8 +2266,7 @@ define void @s_shuffle_v3i64_v2i64__1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2325,8 +2324,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2384,10 +2382,8 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2441,8 +2437,7 @@ define void @s_shuffle_v3i64_v2i64__3_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2491,10 +2486,8 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2507,18 +2500,43 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() { } define void @s_shuffle_v3i64_v2i64__3_3_u() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2572,10 +2590,8 @@ define void @s_shuffle_v3i64_v2i64__3_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2633,10 +2649,8 @@ define void @s_shuffle_v3i64_v2i64__3_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2689,12 +2703,9 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2707,20 +2718,48 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() { } define void @s_shuffle_v3i64_v2i64__3_3_3() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -2765,10 +2804,8 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2780,20 +2817,48 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() { } define void @s_shuffle_v3i64_v2i64__0_0_0() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -2841,12 +2906,9 @@ define void @s_shuffle_v3i64_v2i64__1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2894,10 +2956,8 @@ define void @s_shuffle_v3i64_v2i64__2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2958,12 +3018,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3021,10 +3078,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3086,12 +3141,9 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3153,12 +3205,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3171,18 +3220,43 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() { } define void @s_shuffle_v3i64_v2i64__u_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -3190,18 +3264,43 @@ define void @s_shuffle_v3i64_v2i64__u_1_1() { } define void @s_shuffle_v3i64_v2i64__0_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__0_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__0_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__0_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -3209,20 +3308,48 @@ define void @s_shuffle_v3i64_v2i64__0_1_1() { } define void @s_shuffle_v3i64_v2i64__1_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -3230,18 +3357,43 @@ define void @s_shuffle_v3i64_v2i64__1_1_1() { } define void @s_shuffle_v3i64_v2i64__2_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -3294,10 +3446,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3355,10 +3505,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3420,12 +3568,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3487,12 +3632,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3591,8 +3733,7 @@ define void @s_shuffle_v3i64_v2i64__1_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3658,12 +3799,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3712,10 +3850,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3773,12 +3909,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3836,10 +3969,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3852,18 +3983,43 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() { } define void @s_shuffle_v3i64_v2i64__u_3_3() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -3917,10 +4073,8 @@ define void @s_shuffle_v3i64_v2i64__0_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3978,10 +4132,8 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3994,18 +4146,43 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() { } define void @s_shuffle_v3i64_v2i64__2_3_3() { -; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> @@ -4050,10 +4227,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4115,12 +4290,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4178,10 +4350,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4234,12 +4404,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll index f15dd7d2772e5..3c546bf8a3130 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll @@ -4720,8 +4720,7 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4761,8 +4760,7 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4820,8 +4818,7 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4862,8 +4859,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4918,11 +4914,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4972,8 +4968,7 @@ define void @s_shuffle_v3i64_v3i64__5_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5027,10 +5022,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5075,10 +5068,8 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5091,18 +5082,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() { } define void @s_shuffle_v3i64_v3i64__5_4_u() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5147,10 +5163,8 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5212,12 +5226,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5279,12 +5290,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5342,10 +5350,8 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5394,12 +5400,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5452,12 +5455,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5470,20 +5470,48 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() { } define void @s_shuffle_v3i64_v3i64__5_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -5528,10 +5556,8 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5543,20 +5569,48 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() { } define void @s_shuffle_v3i64_v3i64__0_0_0() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -5604,12 +5658,9 @@ define void @s_shuffle_v3i64_v3i64__1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5657,12 +5708,9 @@ define void @s_shuffle_v3i64_v3i64__2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5710,10 +5758,8 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5771,15 +5817,12 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5838,13 +5881,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5899,11 +5940,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5962,13 +6003,11 @@ define void @s_shuffle_v3i64_v3i64__5_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6022,12 +6061,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6086,13 +6122,11 @@ define void @s_shuffle_v3i64_v3i64__5_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6150,10 +6184,8 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6166,116 +6198,12 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() { } define void @s_shuffle_v3i64_v3i64__u_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__0_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__1_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__2_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__3_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__4_1_1() { -; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -6283,17 +6211,12 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -6301,40 +6224,30 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() - %vec1 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v3i64__5_1_1() { -; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +define void @s_shuffle_v3i64_v3i64__0_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -6342,15 +6255,12 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -6358,60 +6268,309 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() - %vec1 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v3i64__5_u_1() { -; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_1: +define void @s_shuffle_v3i64_v3i64__1_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s12 -; GFX900-NEXT: s_mov_b32 s9, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_1: +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s12 -; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__2_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__3_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__4_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %vec1 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__5_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %vec1 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__5_u_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -6425,11 +6584,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6488,13 +6647,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6552,12 +6709,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6577,217 +6731,344 @@ define void @s_shuffle_v3i64_v3i64__5_3_1() { ; GFX900-NEXT: ; def s[4:9] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:17] +; GFX900-NEXT: ; def s[12:17] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:17] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %vec1 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__5_4_1() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %vec1 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__u_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__0_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__1_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v3i64__2_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1: +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:17] +; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1: +; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() - %vec1 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v3i64__5_4_1() { -; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +define void @s_shuffle_v3i64_v3i64__3_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s12 -; GFX900-NEXT: s_mov_b32 s9, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s12 -; GFX90A-NEXT: s_mov_b32 s9, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1: +; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %vec1 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__u_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__0_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__1_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__2_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v3i64__3_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -6840,10 +7121,8 @@ define void @s_shuffle_v3i64_v3i64__4_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6897,10 +7176,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6950,8 +7227,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7009,12 +7285,9 @@ define void @s_shuffle_v3i64_v3i64__5_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7064,8 +7337,7 @@ define void @s_shuffle_v3i64_v3i64__5_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7119,10 +7391,8 @@ define void @s_shuffle_v3i64_v3i64__5_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7180,10 +7450,8 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7282,8 +7550,7 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7323,8 +7590,7 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7390,12 +7656,9 @@ define void @s_shuffle_v3i64_v3i64__4_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7444,12 +7707,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7494,10 +7754,8 @@ define void @s_shuffle_v3i64_v3i64__5_u_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7552,13 +7810,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7612,10 +7868,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7673,10 +7927,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:17] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7725,12 +7977,9 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7743,18 +7992,43 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() { } define void @s_shuffle_v3i64_v3i64__u_4_4() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7808,10 +8082,8 @@ define void @s_shuffle_v3i64_v3i64__0_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7869,10 +8141,8 @@ define void @s_shuffle_v3i64_v3i64__1_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7930,10 +8200,8 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7946,18 +8214,43 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() { } define void @s_shuffle_v3i64_v3i64__3_4_4() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7966,20 +8259,48 @@ define void @s_shuffle_v3i64_v3i64__3_4_4() { } define void @s_shuffle_v3i64_v3i64__4_4_4() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -7988,20 +8309,48 @@ define void @s_shuffle_v3i64_v3i64__4_4_4() { } define void @s_shuffle_v3i64_v3i64__5_4_4() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8042,10 +8391,8 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8104,13 +8451,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8164,10 +8509,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8225,12 +8568,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8279,12 +8619,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8297,18 +8634,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() { } define void @s_shuffle_v3i64_v3i64__u_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8362,10 +8724,8 @@ define void @s_shuffle_v3i64_v3i64__0_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8423,10 +8783,8 @@ define void @s_shuffle_v3i64_v3i64__1_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8484,10 +8842,8 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8500,18 +8856,43 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() { } define void @s_shuffle_v3i64_v3i64__3_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8520,20 +8901,48 @@ define void @s_shuffle_v3i64_v3i64__3_5_5() { } define void @s_shuffle_v3i64_v3i64__4_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8542,18 +8951,43 @@ define void @s_shuffle_v3i64_v3i64__4_5_5() { } define void @s_shuffle_v3i64_v3i64__5_u_5() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_u_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> @@ -8607,10 +9041,8 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8668,10 +9100,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8729,10 +9159,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8785,12 +9213,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8803,18 +9228,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() { } define void @s_shuffle_v3i64_v3i64__5_4_5() { -; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll index 6e156d2d4a2f5..7815761d29696 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll @@ -8016,8 +8016,7 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8057,8 +8056,7 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8102,8 +8100,7 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8161,8 +8158,7 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8203,8 +8199,7 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8249,8 +8244,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8309,10 +8303,8 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8366,8 +8358,7 @@ define void @s_shuffle_v3i64_v4i64__7_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8425,10 +8416,8 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8482,10 +8471,8 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8534,10 +8521,8 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8550,18 +8535,43 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() { } define void @s_shuffle_v3i64_v4i64__7_5_u() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -8606,10 +8616,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8654,10 +8662,8 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8716,13 +8722,11 @@ define void @s_shuffle_v3i64_v4i64__7_7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8781,13 +8785,11 @@ define void @s_shuffle_v3i64_v4i64__7_7_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8841,10 +8843,8 @@ define void @s_shuffle_v3i64_v4i64__7_7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8902,12 +8902,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8956,12 +8953,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9010,12 +9004,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9028,20 +9019,48 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() { } define void @s_shuffle_v3i64_v4i64__7_7_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -9090,12 +9109,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9144,10 +9160,8 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9159,20 +9173,48 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() { } define void @s_shuffle_v3i64_v4i64__0_0_0() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) @@ -9220,12 +9262,9 @@ define void @s_shuffle_v3i64_v4i64__1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9273,12 +9312,9 @@ define void @s_shuffle_v3i64_v4i64__2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9330,12 +9366,9 @@ define void @s_shuffle_v3i64_v4i64__3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9383,10 +9416,8 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9444,15 +9475,12 @@ define void @s_shuffle_v3i64_v4i64__5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9511,13 +9539,11 @@ define void @s_shuffle_v3i64_v4i64__6_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9576,15 +9602,12 @@ define void @s_shuffle_v3i64_v4i64__7_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9639,13 +9662,11 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9704,15 +9725,12 @@ define void @s_shuffle_v3i64_v4i64__7_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9770,12 +9788,9 @@ define void @s_shuffle_v3i64_v4i64__7_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9833,12 +9848,9 @@ define void @s_shuffle_v3i64_v4i64__7_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9897,15 +9909,12 @@ define void @s_shuffle_v3i64_v4i64__7_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9963,10 +9972,8 @@ define void @s_shuffle_v3i64_v4i64__7_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -10028,12 +10035,9 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -10046,137 +10050,12 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() { } define void @s_shuffle_v3i64_v4i64__u_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__0_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__1_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__2_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__3_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__4_1_1() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__5_1_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1: +; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -10184,17 +10063,12 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -10202,42 +10076,30 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__6_1_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1: +define void @s_shuffle_v3i64_v4i64__0_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -10245,17 +10107,12 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -10263,42 +10120,32 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_1_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1: +define void @s_shuffle_v3i64_v4i64__1_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -10306,17 +10153,14 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -10324,200 +10168,204 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_u_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1: +define void @s_shuffle_v3i64_v4i64__2_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_0_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1: +define void @s_shuffle_v3i64_v4i64__3_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__4_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_2_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +define void @s_shuffle_v3i64_v4i64__5_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -10526,61 +10374,57 @@ define void @s_shuffle_v3i64_v4i64__7_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_3_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1: +define void @s_shuffle_v3i64_v4i64__6_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -10589,92 +10433,80 @@ define void @s_shuffle_v3i64_v4i64__7_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_4_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1: +define void @s_shuffle_v3i64_v4i64__7_1_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_5_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1: +define void @s_shuffle_v3i64_v4i64__7_u_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -10692,7 +10524,7 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -10710,32 +10542,30 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_6_1() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +define void @s_shuffle_v3i64_v4i64__7_0_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -10746,8 +10576,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -10755,7 +10585,7 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -10766,8 +10596,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -10775,190 +10605,131 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__u_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__0_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__1_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__2_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__3_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__4_2_2() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +define void @s_shuffle_v3i64_v4i64__7_2_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__5_2_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +define void @s_shuffle_v3i64_v4i64__7_3_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -10967,295 +10738,1418 @@ define void @s_shuffle_v3i64_v4i64__5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__6_2_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +define void @s_shuffle_v3i64_v4i64__7_4_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_2_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +define void @s_shuffle_v3i64_v4i64__7_5_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_6_1() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__u_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__0_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__1_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__2_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__3_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__4_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__5_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__6_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_2_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_u_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_0_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_1_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_3_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_4_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_5_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__7_6_2() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__u_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__0_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__1_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__2_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_u_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +define void @s_shuffle_v3i64_v4i64__3_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_0_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +define void @s_shuffle_v3i64_v4i64__4_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_1_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +define void @s_shuffle_v3i64_v4i64__5_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11264,21 +12158,22 @@ define void @s_shuffle_v3i64_v4i64__7_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_3_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +define void @s_shuffle_v3i64_v4i64__6_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -11287,16 +12182,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s10, s14 ; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -11305,16 +12200,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s10, s14 ; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11323,23 +12218,22 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_4_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +define void @s_shuffle_v3i64_v4i64__7_3_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -11350,14 +12244,16 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -11368,14 +12264,16 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11384,59 +12282,58 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_5_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +define void @s_shuffle_v3i64_v4i64__7_u_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11445,59 +12342,61 @@ define void @s_shuffle_v3i64_v4i64__7_5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_6_2() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +define void @s_shuffle_v3i64_v4i64__7_0_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11506,30 +12405,32 @@ define void @s_shuffle_v3i64_v4i64__7_6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__u_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +define void @s_shuffle_v3i64_v4i64__7_1_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11537,14 +12438,17 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11552,58 +12456,42 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__0_3_3() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__1_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +define void @s_shuffle_v3i64_v4i64__7_2_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11611,16 +12499,19 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11628,39 +12519,43 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__2_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3: +define void @s_shuffle_v3i64_v4i64__7_4_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s12 -; GFX900-NEXT: s_mov_b32 s9, s13 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11668,16 +12563,19 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s12 -; GFX90A-NEXT: s_mov_b32 s9, s13 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11685,94 +12583,102 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__3_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3: +define void @s_shuffle_v3i64_v4i64__7_5_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__4_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3: +define void @s_shuffle_v3i64_v4i64__7_6_3() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11780,14 +12686,19 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11795,649 +12706,531 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__5_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3: +define void @s_shuffle_v3i64_v4i64__u_4_4() { +; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:13] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__0_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__6_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3: +define void @s_shuffle_v3i64_v4i64__1_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_3_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3: +define void @s_shuffle_v3i64_v4i64__2_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3: -; GFX942: ; %bb.0: -; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_u_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3: +define void @s_shuffle_v3i64_v4i64__3_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_0_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3: +define void @s_shuffle_v3i64_v4i64__4_4_4() { +; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:13] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__5_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_1_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3: +define void @s_shuffle_v3i64_v4i64__6_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_2_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3: +define void @s_shuffle_v3i64_v4i64__7_4_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_4_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3: +define void @s_shuffle_v3i64_v4i64__7_u_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_5_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3: +define void @s_shuffle_v3i64_v4i64__7_0_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_6_3() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3: +define void @s_shuffle_v3i64_v4i64__7_1_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -12446,831 +13239,783 @@ define void @s_shuffle_v3i64_v4i64__7_6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__u_4_4() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__0_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4: +define void @s_shuffle_v3i64_v4i64__7_2_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__1_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4: +define void @s_shuffle_v3i64_v4i64__7_3_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__2_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4: +define void @s_shuffle_v3i64_v4i64__7_5_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__3_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4: +define void @s_shuffle_v3i64_v4i64__7_6_4() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__4_4_4() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__5_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4: +define void @s_shuffle_v3i64_v4i64__u_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__6_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4: +define void @s_shuffle_v3i64_v4i64__0_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_4_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4: +define void @s_shuffle_v3i64_v4i64__1_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_u_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4: +define void @s_shuffle_v3i64_v4i64__2_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_0_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4: +define void @s_shuffle_v3i64_v4i64__3_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_1_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4: +define void @s_shuffle_v3i64_v4i64__4_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_2_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4: +define void @s_shuffle_v3i64_v4i64__5_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_3_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4: +define void @s_shuffle_v3i64_v4i64__6_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_5_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4: +define void @s_shuffle_v3i64_v4i64__7_5_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s6 -; GFX900-NEXT: s_mov_b32 s11, s7 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s6 -; GFX90A-NEXT: s_mov_b32 s11, s7 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_6_4() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4: +define void @s_shuffle_v3i64_v4i64__7_u_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__u_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__0_5_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_5_5: +define void @s_shuffle_v3i64_v4i64__7_0_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -13278,17 +14023,19 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_5_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -13296,68 +14043,68 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__1_5_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_5_5: +define void @s_shuffle_v3i64_v4i64__7_1_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_5_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -13366,231 +14113,145 @@ define void @s_shuffle_v3i64_v4i64__1_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__2_5_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_5_5: +define void @s_shuffle_v3i64_v4i64__7_2_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_5_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__3_5_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_5_5: +define void @s_shuffle_v3i64_v4i64__7_3_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_5_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__4_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__5_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__6_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__7_5_5() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_u_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_5: +define void @s_shuffle_v3i64_v4i64__7_4_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -13598,6 +14259,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -13605,7 +14268,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -13613,6 +14276,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -13620,41 +14285,37 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_0_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_5: +define void @s_shuffle_v3i64_v4i64__7_6_5() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -13662,19 +14323,16 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -13682,35 +14340,73 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) + ret void +} + +define void @s_shuffle_v3i64_v4i64__u_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_1_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_5: +define void @s_shuffle_v3i64_v4i64__0_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -13719,16 +14415,16 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -13737,16 +14433,16 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -13755,360 +14451,300 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_2_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_5: +define void @s_shuffle_v3i64_v4i64__1_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_3_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_5: +define void @s_shuffle_v3i64_v4i64__2_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_4_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_5: +define void @s_shuffle_v3i64_v4i64__3_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__7_6_5() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_5: +define void @s_shuffle_v3i64_v4i64__4_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_5: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__u_6_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__0_6_6() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_6_6: +define void @s_shuffle_v3i64_v4i64__5_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_6_6: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__1_6_6() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_6_6: +define void @s_shuffle_v3i64_v4i64__6_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -14116,17 +14752,14 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_6_6: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -14134,42 +14767,34 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__2_6_6() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_6_6: +define void @s_shuffle_v3i64_v4i64__7_6_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_6: ; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -14177,17 +14802,14 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_6_6: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -14195,190 +14817,63 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) ret void } -define void @s_shuffle_v3i64_v4i64__3_6_6() { -; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_6_6: +define void @s_shuffle_v3i64_v4i64__7_u_6() { +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_6_6: +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6: +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__4_6_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__5_6_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__6_6_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__7_6_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf) - ret void -} - -define void @s_shuffle_v3i64_v4i64__7_u_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_u_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14432,10 +14927,8 @@ define void @s_shuffle_v3i64_v4i64__7_0_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14493,10 +14986,8 @@ define void @s_shuffle_v3i64_v4i64__7_1_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14554,10 +15045,8 @@ define void @s_shuffle_v3i64_v4i64__7_2_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14615,10 +15104,8 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14671,12 +15158,9 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14689,18 +15173,43 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() { } define void @s_shuffle_v3i64_v4i64__7_5_6() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -14745,10 +15254,8 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14806,10 +15313,8 @@ define void @s_shuffle_v3i64_v4i64__0_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14871,12 +15376,9 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14934,12 +15436,9 @@ define void @s_shuffle_v3i64_v4i64__2_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15001,12 +15500,9 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15019,20 +15515,48 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() { } define void @s_shuffle_v3i64_v4i64__4_7_7() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15081,12 +15605,9 @@ define void @s_shuffle_v3i64_v4i64__5_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15139,12 +15660,9 @@ define void @s_shuffle_v3i64_v4i64__6_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15193,10 +15711,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15258,12 +15774,9 @@ define void @s_shuffle_v3i64_v4i64__7_0_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15321,10 +15834,8 @@ define void @s_shuffle_v3i64_v4i64__7_1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15386,12 +15897,9 @@ define void @s_shuffle_v3i64_v4i64__7_2_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15449,12 +15957,9 @@ define void @s_shuffle_v3i64_v4i64__7_3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15507,12 +16012,9 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15525,20 +16027,48 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() { } define void @s_shuffle_v3i64_v4i64__7_5_7() { -; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> @@ -15587,12 +16117,9 @@ define void @s_shuffle_v3i64_v4i64__7_6_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll index fe132493ce536..fdba1e81224d0 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll @@ -2266,8 +2266,7 @@ define void @s_shuffle_v3p0_v2p0__1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2325,8 +2324,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2384,10 +2382,8 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2441,8 +2437,7 @@ define void @s_shuffle_v3p0_v2p0__3_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2491,10 +2486,8 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2507,18 +2500,43 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() { } define void @s_shuffle_v3p0_v2p0__3_3_u() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2572,10 +2590,8 @@ define void @s_shuffle_v3p0_v2p0__3_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2633,10 +2649,8 @@ define void @s_shuffle_v3p0_v2p0__3_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2689,12 +2703,9 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2707,20 +2718,48 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() { } define void @s_shuffle_v3p0_v2p0__3_3_3() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -2765,10 +2804,8 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2780,20 +2817,48 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() { } define void @s_shuffle_v3p0_v2p0__0_0_0() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -2841,12 +2906,9 @@ define void @s_shuffle_v3p0_v2p0__1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2894,10 +2956,8 @@ define void @s_shuffle_v3p0_v2p0__2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -2958,12 +3018,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3021,10 +3078,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3086,12 +3141,9 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3153,12 +3205,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3171,18 +3220,43 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() { } define void @s_shuffle_v3p0_v2p0__u_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -3190,18 +3264,43 @@ define void @s_shuffle_v3p0_v2p0__u_1_1() { } define void @s_shuffle_v3p0_v2p0__0_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__0_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__0_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__0_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -3209,20 +3308,48 @@ define void @s_shuffle_v3p0_v2p0__0_1_1() { } define void @s_shuffle_v3p0_v2p0__1_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -3230,18 +3357,43 @@ define void @s_shuffle_v3p0_v2p0__1_1_1() { } define void @s_shuffle_v3p0_v2p0__2_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -3294,10 +3446,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3355,10 +3505,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3420,12 +3568,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3487,12 +3632,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3591,8 +3733,7 @@ define void @s_shuffle_v3p0_v2p0__1_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3658,12 +3799,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3712,10 +3850,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3773,12 +3909,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3836,10 +3969,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3852,18 +3983,43 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() { } define void @s_shuffle_v3p0_v2p0__u_3_3() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -3917,10 +4073,8 @@ define void @s_shuffle_v3p0_v2p0__0_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3978,10 +4132,8 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -3994,18 +4146,43 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() { } define void @s_shuffle_v3p0_v2p0__2_3_3() { -; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> @@ -4050,10 +4227,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4115,12 +4290,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4178,10 +4350,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4234,12 +4404,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll index b6f4e3091b61f..422bcb5b4414a 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll @@ -4720,8 +4720,7 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4761,8 +4760,7 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4820,8 +4818,7 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4862,8 +4859,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4918,11 +4914,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -4972,8 +4968,7 @@ define void @s_shuffle_v3p0_v3p0__5_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5027,10 +5022,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5075,10 +5068,8 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5091,18 +5082,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() { } define void @s_shuffle_v3p0_v3p0__5_4_u() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5147,10 +5163,8 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5212,12 +5226,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5279,12 +5290,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5342,10 +5350,8 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5394,12 +5400,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5452,12 +5455,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5470,20 +5470,48 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() { } define void @s_shuffle_v3p0_v3p0__5_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -5528,10 +5556,8 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5543,20 +5569,48 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() { } define void @s_shuffle_v3p0_v3p0__0_0_0() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -5604,12 +5658,9 @@ define void @s_shuffle_v3p0_v3p0__1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5657,12 +5708,9 @@ define void @s_shuffle_v3p0_v3p0__2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5710,10 +5758,8 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5771,15 +5817,12 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5838,13 +5881,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5899,11 +5940,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -5962,13 +6003,11 @@ define void @s_shuffle_v3p0_v3p0__5_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6022,12 +6061,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6086,13 +6122,11 @@ define void @s_shuffle_v3p0_v3p0__5_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6150,10 +6184,8 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6166,116 +6198,12 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() { } define void @s_shuffle_v3p0_v3p0__u_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__0_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__1_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__2_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__3_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__4_1_1() { -; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -6283,17 +6211,12 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -6301,40 +6224,30 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %vec1 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v3p0__5_1_1() { -; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +define void @s_shuffle_v3p0_v3p0__0_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -6342,15 +6255,12 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -6358,60 +6268,309 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %vec1 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v3p0__5_u_1() { -; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_1: +define void @s_shuffle_v3p0_v3p0__1_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s12 -; GFX900-NEXT: s_mov_b32 s9, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_1: +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s12 -; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__2_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__3_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__4_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %vec1 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__5_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %vec1 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__5_u_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -6425,11 +6584,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6488,13 +6647,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6552,12 +6709,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6577,217 +6731,344 @@ define void @s_shuffle_v3p0_v3p0__5_3_1() { ; GFX900-NEXT: ; def s[4:9] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:17] +; GFX900-NEXT: ; def s[12:17] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:17] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %vec1 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__5_4_1() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:9] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:5] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %vec1 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__u_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__0_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__1_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v3p0__2_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_1: +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:17] +; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1: +; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:9] +; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %vec1 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v3p0__5_4_1() { -; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +define void @s_shuffle_v3p0_v3p0__3_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s12 -; GFX900-NEXT: s_mov_b32 s9, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:9] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s12 -; GFX90A-NEXT: s_mov_b32 s9, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1: +; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:13] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:5] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %vec1 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__u_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__0_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__1_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__2_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v3p0__3_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -6840,10 +7121,8 @@ define void @s_shuffle_v3p0_v3p0__4_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6897,10 +7176,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -6950,8 +7227,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7009,12 +7285,9 @@ define void @s_shuffle_v3p0_v3p0__5_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7064,8 +7337,7 @@ define void @s_shuffle_v3p0_v3p0__5_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7119,10 +7391,8 @@ define void @s_shuffle_v3p0_v3p0__5_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7180,10 +7450,8 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7282,8 +7550,7 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7323,8 +7590,7 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7390,12 +7656,9 @@ define void @s_shuffle_v3p0_v3p0__4_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7444,12 +7707,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7494,10 +7754,8 @@ define void @s_shuffle_v3p0_v3p0__5_u_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7552,13 +7810,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7612,10 +7868,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7673,10 +7927,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[12:17] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7725,12 +7977,9 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7743,18 +7992,43 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() { } define void @s_shuffle_v3p0_v3p0__u_4_4() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7808,10 +8082,8 @@ define void @s_shuffle_v3p0_v3p0__0_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7869,10 +8141,8 @@ define void @s_shuffle_v3p0_v3p0__1_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7930,10 +8200,8 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -7946,18 +8214,43 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() { } define void @s_shuffle_v3p0_v3p0__3_4_4() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7966,20 +8259,48 @@ define void @s_shuffle_v3p0_v3p0__3_4_4() { } define void @s_shuffle_v3p0_v3p0__4_4_4() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -7988,20 +8309,48 @@ define void @s_shuffle_v3p0_v3p0__4_4_4() { } define void @s_shuffle_v3p0_v3p0__5_4_4() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8042,10 +8391,8 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8104,13 +8451,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8164,10 +8509,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8225,12 +8568,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8279,12 +8619,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8297,18 +8634,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() { } define void @s_shuffle_v3p0_v3p0__u_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8362,10 +8724,8 @@ define void @s_shuffle_v3p0_v3p0__0_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8423,10 +8783,8 @@ define void @s_shuffle_v3p0_v3p0__1_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8484,10 +8842,8 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8500,18 +8856,43 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() { } define void @s_shuffle_v3p0_v3p0__3_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8520,20 +8901,48 @@ define void @s_shuffle_v3p0_v3p0__3_5_5() { } define void @s_shuffle_v3p0_v3p0__4_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8542,18 +8951,43 @@ define void @s_shuffle_v3p0_v3p0__4_5_5() { } define void @s_shuffle_v3p0_v3p0__5_u_5() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_u_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> @@ -8607,10 +9041,8 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8668,10 +9100,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8729,10 +9159,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8785,12 +9213,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8803,18 +9228,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() { } define void @s_shuffle_v3p0_v3p0__5_4_5() { -; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll index b03066e66cf66..707633944e851 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll @@ -8016,8 +8016,7 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8057,8 +8056,7 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8102,8 +8100,7 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8161,8 +8158,7 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8203,8 +8199,7 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8249,8 +8244,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8309,10 +8303,8 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8366,8 +8358,7 @@ define void @s_shuffle_v3p0_v4p0__7_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8425,10 +8416,8 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8482,10 +8471,8 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8534,10 +8521,8 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8550,18 +8535,43 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() { } define void @s_shuffle_v3p0_v4p0__7_5_u() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -8606,10 +8616,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8654,10 +8662,8 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8716,13 +8722,11 @@ define void @s_shuffle_v3p0_v4p0__7_7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8781,13 +8785,11 @@ define void @s_shuffle_v3p0_v4p0__7_7_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8841,10 +8843,8 @@ define void @s_shuffle_v3p0_v4p0__7_7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8902,12 +8902,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -8956,12 +8953,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9010,12 +9004,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9028,20 +9019,48 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() { } define void @s_shuffle_v3p0_v4p0__7_7_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -9090,12 +9109,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9144,10 +9160,8 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9159,20 +9173,48 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() { } define void @s_shuffle_v3p0_v4p0__0_0_0() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) @@ -9220,12 +9262,9 @@ define void @s_shuffle_v3p0_v4p0__1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9273,12 +9312,9 @@ define void @s_shuffle_v3p0_v4p0__2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9330,12 +9366,9 @@ define void @s_shuffle_v3p0_v4p0__3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9383,10 +9416,8 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9444,15 +9475,12 @@ define void @s_shuffle_v3p0_v4p0__5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9511,13 +9539,11 @@ define void @s_shuffle_v3p0_v4p0__6_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9576,15 +9602,12 @@ define void @s_shuffle_v3p0_v4p0__7_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9639,13 +9662,11 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9704,15 +9725,12 @@ define void @s_shuffle_v3p0_v4p0__7_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9770,12 +9788,9 @@ define void @s_shuffle_v3p0_v4p0__7_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9833,12 +9848,9 @@ define void @s_shuffle_v3p0_v4p0__7_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9897,15 +9909,12 @@ define void @s_shuffle_v3p0_v4p0__7_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -9963,10 +9972,8 @@ define void @s_shuffle_v3p0_v4p0__7_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -10028,12 +10035,9 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -10046,137 +10050,12 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() { } define void @s_shuffle_v3p0_v4p0__u_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__0_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__1_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__2_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__3_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__4_1_1() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__5_1_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1: +; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -10184,17 +10063,12 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -10202,42 +10076,30 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__6_1_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1: +define void @s_shuffle_v3p0_v4p0__0_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -10245,17 +10107,12 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -10263,42 +10120,32 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_1_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1: +define void @s_shuffle_v3p0_v4p0__1_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART @@ -10306,17 +10153,14 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s12, s10 ; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART @@ -10324,200 +10168,204 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_u_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1: +define void @s_shuffle_v3p0_v4p0__2_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_0_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1: +define void @s_shuffle_v3p0_v4p0__3_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__4_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_2_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +define void @s_shuffle_v3p0_v4p0__5_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -10526,61 +10374,57 @@ define void @s_shuffle_v3p0_v4p0__7_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_3_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1: +define void @s_shuffle_v3p0_v4p0__6_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -10589,92 +10433,80 @@ define void @s_shuffle_v3p0_v4p0__7_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_4_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1: +define void @s_shuffle_v3p0_v4p0__7_1_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_5_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1: +define void @s_shuffle_v3p0_v4p0__7_u_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -10692,7 +10524,7 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -10710,32 +10542,30 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_6_1() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +define void @s_shuffle_v3p0_v4p0__7_0_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -10746,8 +10576,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -10755,7 +10585,7 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -10766,8 +10596,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -10775,190 +10605,131 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__u_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__0_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__1_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__2_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__3_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__4_2_2() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +define void @s_shuffle_v3p0_v4p0__7_2_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__5_2_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +define void @s_shuffle_v3p0_v4p0__7_3_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -10967,295 +10738,1418 @@ define void @s_shuffle_v3p0_v4p0__5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__6_2_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +define void @s_shuffle_v3p0_v4p0__7_4_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_2_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +define void @s_shuffle_v3p0_v4p0__7_5_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_6_1() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__u_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__0_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__1_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__2_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__3_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__4_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__5_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__6_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_2_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_u_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_0_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_1_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_3_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_4_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_5_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__7_6_2() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__u_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__0_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__1_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__2_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_u_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +define void @s_shuffle_v3p0_v4p0__3_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_0_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +define void @s_shuffle_v3p0_v4p0__4_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_1_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +define void @s_shuffle_v3p0_v4p0__5_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11264,21 +12158,22 @@ define void @s_shuffle_v3p0_v4p0__7_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_3_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +define void @s_shuffle_v3p0_v4p0__6_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -11287,16 +12182,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s10, s14 ; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -11305,16 +12200,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s10, s14 ; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11323,23 +12218,22 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_4_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +define void @s_shuffle_v3p0_v4p0__7_3_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -11350,14 +12244,16 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -11368,14 +12264,16 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11384,59 +12282,58 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_5_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +define void @s_shuffle_v3p0_v4p0__7_u_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11445,59 +12342,61 @@ define void @s_shuffle_v3p0_v4p0__7_5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_6_2() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +define void @s_shuffle_v3p0_v4p0__7_0_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -11506,30 +12405,32 @@ define void @s_shuffle_v3p0_v4p0__7_6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__u_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +define void @s_shuffle_v3p0_v4p0__7_1_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11537,14 +12438,17 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11552,58 +12456,42 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__0_3_3() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__1_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +define void @s_shuffle_v3p0_v4p0__7_2_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11611,16 +12499,19 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11628,39 +12519,43 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__2_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3: +define void @s_shuffle_v3p0_v4p0__7_4_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s12 -; GFX900-NEXT: s_mov_b32 s9, s13 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11668,16 +12563,19 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s12 -; GFX90A-NEXT: s_mov_b32 s9, s13 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11685,94 +12583,102 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__3_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3: +define void @s_shuffle_v3p0_v4p0__7_5_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s14 ; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__4_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3: +define void @s_shuffle_v3p0_v4p0__7_6_3() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -11780,14 +12686,19 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -11795,649 +12706,531 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__5_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3: +define void @s_shuffle_v3p0_v4p0__u_4_4() { +; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:13] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__0_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__6_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3: +define void @s_shuffle_v3p0_v4p0__1_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_3_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3: +define void @s_shuffle_v3p0_v4p0__2_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3: -; GFX942: ; %bb.0: -; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_u_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3: +define void @s_shuffle_v3p0_v4p0__3_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_0_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3: +define void @s_shuffle_v3p0_v4p0__4_4_4() { +; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:13] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__5_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_1_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3: +define void @s_shuffle_v3p0_v4p0__6_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_2_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3: +define void @s_shuffle_v3p0_v4p0__7_4_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_4_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3: +define void @s_shuffle_v3p0_v4p0__7_u_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_5_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3: +define void @s_shuffle_v3p0_v4p0__7_0_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_6_3() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3: +define void @s_shuffle_v3p0_v4p0__7_1_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -12446,831 +13239,783 @@ define void @s_shuffle_v3p0_v4p0__7_6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__u_4_4() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__0_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4: +define void @s_shuffle_v3p0_v4p0__7_2_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__1_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4: +define void @s_shuffle_v3p0_v4p0__7_3_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__2_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4: +define void @s_shuffle_v3p0_v4p0__7_5_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__3_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4: +define void @s_shuffle_v3p0_v4p0__7_6_4() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__4_4_4() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__5_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4: +define void @s_shuffle_v3p0_v4p0__u_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__6_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4: +define void @s_shuffle_v3p0_v4p0__0_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_4_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4: +define void @s_shuffle_v3p0_v4p0__1_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_u_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4: +define void @s_shuffle_v3p0_v4p0__2_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_0_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4: +define void @s_shuffle_v3p0_v4p0__3_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_1_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4: +define void @s_shuffle_v3p0_v4p0__4_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_2_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4: +define void @s_shuffle_v3p0_v4p0__5_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_3_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4: +define void @s_shuffle_v3p0_v4p0__6_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_5_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4: +define void @s_shuffle_v3p0_v4p0__7_5_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s6 -; GFX900-NEXT: s_mov_b32 s11, s7 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s6 -; GFX90A-NEXT: s_mov_b32 s11, s7 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_6_4() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4: +define void @s_shuffle_v3p0_v4p0__7_u_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__u_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__0_5_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_5_5: +define void @s_shuffle_v3p0_v4p0__7_0_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -13278,17 +14023,19 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_5_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -13296,68 +14043,68 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__1_5_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_5_5: +define void @s_shuffle_v3p0_v4p0__7_1_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_5_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -13366,231 +14113,145 @@ define void @s_shuffle_v3p0_v4p0__1_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__2_5_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_5_5: +define void @s_shuffle_v3p0_v4p0__7_2_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_5_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__3_5_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_5_5: +define void @s_shuffle_v3p0_v4p0__7_3_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_5_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__4_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__5_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__6_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__7_5_5() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_u_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_5: +define void @s_shuffle_v3p0_v4p0__7_4_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -13598,6 +14259,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -13605,7 +14268,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -13613,6 +14276,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -13620,41 +14285,37 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_0_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_5: +define void @s_shuffle_v3p0_v4p0__7_6_5() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -13662,19 +14323,16 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -13682,35 +14340,73 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v3p0_v4p0__u_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_1_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_5: +define void @s_shuffle_v3p0_v4p0__0_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -13719,16 +14415,16 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -13737,16 +14433,16 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -13755,360 +14451,300 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_2_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_5: +define void @s_shuffle_v3p0_v4p0__1_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_3_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_5: +define void @s_shuffle_v3p0_v4p0__2_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_4_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_5: +define void @s_shuffle_v3p0_v4p0__3_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__7_6_5() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_5: +define void @s_shuffle_v3p0_v4p0__4_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_5: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__u_6_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__0_6_6() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_6_6: +define void @s_shuffle_v3p0_v4p0__5_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_6_6: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__1_6_6() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_6_6: +define void @s_shuffle_v3p0_v4p0__6_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -14116,17 +14752,14 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_6_6: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -14134,42 +14767,34 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__2_6_6() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_6_6: +define void @s_shuffle_v3p0_v4p0__7_6_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_6: ; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -14177,17 +14802,14 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_6_6: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -14195,190 +14817,63 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) ret void } -define void @s_shuffle_v3p0_v4p0__3_6_6() { -; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_6_6: +define void @s_shuffle_v3p0_v4p0__7_u_6() { +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:13] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_6_6: +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:13] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6: +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__4_6_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__5_6_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__6_6_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__7_6_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> - call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v3p0_v4p0__7_u_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_u_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14432,10 +14927,8 @@ define void @s_shuffle_v3p0_v4p0__7_0_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14493,10 +14986,8 @@ define void @s_shuffle_v3p0_v4p0__7_1_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14554,10 +15045,8 @@ define void @s_shuffle_v3p0_v4p0__7_2_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14615,10 +15104,8 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14671,12 +15158,9 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14689,18 +15173,43 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() { } define void @s_shuffle_v3p0_v4p0__7_5_6() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -14745,10 +15254,8 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14806,10 +15313,8 @@ define void @s_shuffle_v3p0_v4p0__0_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14871,12 +15376,9 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -14934,12 +15436,9 @@ define void @s_shuffle_v3p0_v4p0__2_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15001,12 +15500,9 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15019,20 +15515,48 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() { } define void @s_shuffle_v3p0_v4p0__4_7_7() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15081,12 +15605,9 @@ define void @s_shuffle_v3p0_v4p0__5_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15139,12 +15660,9 @@ define void @s_shuffle_v3p0_v4p0__6_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15193,10 +15711,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15258,12 +15774,9 @@ define void @s_shuffle_v3p0_v4p0__7_0_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15321,10 +15834,8 @@ define void @s_shuffle_v3p0_v4p0__7_1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15386,12 +15897,9 @@ define void @s_shuffle_v3p0_v4p0__7_2_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15449,12 +15957,9 @@ define void @s_shuffle_v3p0_v4p0__7_3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15507,12 +16012,9 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND @@ -15525,20 +16027,48 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() { } define void @s_shuffle_v3p0_v4p0__7_5_7() { -; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> @@ -15587,12 +16117,9 @@ define void @s_shuffle_v3p0_v4p0__7_6_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:13] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll index ac7d9557ce765..3dc06c075b039 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll @@ -3734,8 +3734,7 @@ define void @s_shuffle_v4i64_v2i64__1_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3793,8 +3792,7 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3852,10 +3850,8 @@ define void @s_shuffle_v4i64_v2i64__3_0_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3909,8 +3905,7 @@ define void @s_shuffle_v4i64_v2i64__3_1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3959,10 +3954,8 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3975,18 +3968,43 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() { } define void @s_shuffle_v4i64_v2i64__3_3_u_u() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -3995,21 +4013,52 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_u() { } define void @s_shuffle_v4i64_v2i64__3_3_0_u() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4063,10 +4112,8 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4079,20 +4126,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() { } define void @s_shuffle_v4i64_v2i64__3_3_2_u() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4101,20 +4176,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_u() { } define void @s_shuffle_v4i64_v2i64__3_3_3_u() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4172,12 +4275,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4190,23 +4290,57 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() { } define void @s_shuffle_v4i64_v2i64__3_3_3_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -4259,14 +4393,10 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4279,44 +4409,103 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() { } define void @s_shuffle_v4i64_v2i64__3_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x i64> asm "; def $0", "=s"() - %vec1 = call <2 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v2i64__u_0_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x i64> asm "; def $0", "=s"() + %vec1 = call <2 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v2i64__u_0_0_0() { +; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4324,22 +4513,53 @@ define void @s_shuffle_v4i64_v2i64__u_0_0_0() { } define void @s_shuffle_v4i64_v2i64__0_0_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: s_mov_b32 s14, s8 +; GFX900-NEXT: s_mov_b32 s15, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: s_mov_b32 s14, s8 +; GFX90A-NEXT: s_mov_b32 s15, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4347,22 +4567,53 @@ define void @s_shuffle_v4i64_v2i64__0_0_0_0() { } define void @s_shuffle_v4i64_v2i64__1_0_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4370,20 +4621,48 @@ define void @s_shuffle_v4i64_v2i64__1_0_0_0() { } define void @s_shuffle_v4i64_v2i64__2_0_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4440,12 +4719,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4503,10 +4779,8 @@ define void @s_shuffle_v4i64_v2i64__3_u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4568,12 +4842,9 @@ define void @s_shuffle_v4i64_v2i64__3_1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4635,12 +4906,9 @@ define void @s_shuffle_v4i64_v2i64__3_2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4653,57 +4921,91 @@ define void @s_shuffle_v4i64_v2i64__3_2_0_0() { } define void @s_shuffle_v4i64_v2i64__3_3_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x i64> asm "; def $0", "=s"() - %vec1 = call <2 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v2i64__3_3_u_0() { -; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0: +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ; def s[12:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ; def s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0: +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ; def s[12:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:7] +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x i64> asm "; def $0", "=s"() + %vec1 = call <2 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v2i64__3_3_u_0() { +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:7] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 @@ -4723,10 +5025,8 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4788,12 +5088,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4855,12 +5152,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4873,20 +5167,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() { } define void @s_shuffle_v4i64_v2i64__u_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4894,20 +5216,48 @@ define void @s_shuffle_v4i64_v2i64__u_1_1_1() { } define void @s_shuffle_v4i64_v2i64__0_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4915,22 +5265,53 @@ define void @s_shuffle_v4i64_v2i64__0_1_1_1() { } define void @s_shuffle_v4i64_v2i64__1_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -4938,20 +5319,48 @@ define void @s_shuffle_v4i64_v2i64__1_1_1_1() { } define void @s_shuffle_v4i64_v2i64__2_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -5008,12 +5417,9 @@ define void @s_shuffle_v4i64_v2i64__3_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5071,10 +5477,8 @@ define void @s_shuffle_v4i64_v2i64__3_u_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5136,12 +5540,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5203,12 +5604,9 @@ define void @s_shuffle_v4i64_v2i64__3_2_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5221,46 +5619,111 @@ define void @s_shuffle_v4i64_v2i64__3_2_1_1() { } define void @s_shuffle_v4i64_v2i64__3_3_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x i64> asm "; def $0", "=s"() - %vec1 = call <2 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x i64> asm "; def $0", "=s"() + %vec1 = call <2 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} define void @s_shuffle_v4i64_v2i64__3_3_u_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5269,21 +5732,52 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_1() { } define void @s_shuffle_v4i64_v2i64__3_3_0_1() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5341,12 +5835,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5445,8 +5936,7 @@ define void @s_shuffle_v4i64_v2i64__1_2_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5472,22 +5962,53 @@ define void @s_shuffle_v4i64_v2i64__2_2_2_2() { } define void @s_shuffle_v4i64_v2i64__3_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5496,20 +6017,48 @@ define void @s_shuffle_v4i64_v2i64__3_2_2_2() { } define void @s_shuffle_v4i64_v2i64__3_u_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5567,12 +6116,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5585,23 +6131,57 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() { } define void @s_shuffle_v4i64_v2i64__3_1_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5610,22 +6190,53 @@ define void @s_shuffle_v4i64_v2i64__3_1_2_2() { } define void @s_shuffle_v4i64_v2i64__3_3_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5674,12 +6285,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5741,12 +6349,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5812,14 +6417,10 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5832,20 +6433,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() { } define void @s_shuffle_v4i64_v2i64__u_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5854,23 +6483,57 @@ define void @s_shuffle_v4i64_v2i64__u_3_3_3() { } define void @s_shuffle_v4i64_v2i64__0_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -5928,12 +6591,9 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5946,78 +6606,134 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() { } define void @s_shuffle_v4i64_v2i64__2_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x i64> asm "; def $0", "=s"() - %vec1 = call <2 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v2i64__3_u_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x i64> asm "; def $0", "=s"() - %vec1 = call <2 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v2i64__3_0_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3: +; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ; def s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ; def s[8:11] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:7] +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x i64> asm "; def $0", "=s"() + %vec1 = call <2 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v2i64__3_u_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x i64> asm "; def $0", "=s"() + %vec1 = call <2 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v2i64__3_0_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:7] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 @@ -6039,12 +6755,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -6057,23 +6770,57 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() { } define void @s_shuffle_v4i64_v2i64__3_1_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -6082,22 +6829,53 @@ define void @s_shuffle_v4i64_v2i64__3_1_3_3() { } define void @s_shuffle_v4i64_v2i64__3_2_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -6106,20 +6884,48 @@ define void @s_shuffle_v4i64_v2i64__3_2_3_3() { } define void @s_shuffle_v4i64_v2i64__3_3_u_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -6128,23 +6934,57 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_3() { } define void @s_shuffle_v4i64_v2i64__3_3_0_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> @@ -6202,12 +7042,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -6220,20 +7057,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() { } define void @s_shuffle_v4i64_v2i64__3_3_2_3() { -; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x i64> asm "; def $0", "=s"() %vec1 = call <2 x i64> asm "; def $0", "=s"() %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll index 8dd4a40d00680..1a295a4c6e8ed 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll @@ -7818,8 +7818,7 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -7859,8 +7858,7 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -7918,8 +7916,7 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -7960,8 +7957,7 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8016,11 +8012,11 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8070,8 +8066,7 @@ define void @s_shuffle_v4i64_v3i64__5_1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8125,10 +8120,8 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8173,10 +8166,8 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8189,18 +8180,43 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() { } define void @s_shuffle_v4i64_v3i64__5_4_u_u() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8245,10 +8261,8 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8310,12 +8324,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8377,12 +8388,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8440,10 +8448,8 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8492,12 +8498,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8550,12 +8553,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8568,20 +8568,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() { } define void @s_shuffle_v4i64_v3i64__5_5_5_u() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8639,12 +8667,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8706,12 +8731,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8773,12 +8795,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8835,14 +8854,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8895,14 +8910,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8915,22 +8926,53 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() { } define void @s_shuffle_v4i64_v3i64__5_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -8979,12 +9021,9 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8996,22 +9035,53 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() { } define void @s_shuffle_v4i64_v3i64__0_0_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: s_mov_b32 s14, s8 +; GFX900-NEXT: s_mov_b32 s15, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: s_mov_b32 s14, s8 +; GFX90A-NEXT: s_mov_b32 s15, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -9063,14 +9133,10 @@ define void @s_shuffle_v4i64_v3i64__1_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9122,14 +9188,10 @@ define void @s_shuffle_v4i64_v3i64__2_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9181,12 +9243,9 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9248,17 +9307,13 @@ define void @s_shuffle_v4i64_v3i64__4_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9321,15 +9376,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9388,13 +9440,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9457,15 +9507,12 @@ define void @s_shuffle_v4i64_v3i64__5_1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9527,14 +9574,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9597,15 +9640,12 @@ define void @s_shuffle_v4i64_v3i64__5_3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9667,12 +9707,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9738,14 +9775,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9807,12 +9840,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9878,14 +9908,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9951,14 +9977,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10020,12 +10042,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10091,14 +10110,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10111,126 +10126,12 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() { } define void @s_shuffle_v4i64_v3i64__u_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v3i64__0_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v3i64__1_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v3i64__2_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v3i64__3_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v3i64__4_1_1_1() { -; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: +; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: s_mov_b32 s14, s10 @@ -10240,13 +10141,273 @@ define void @s_shuffle_v4i64_v3i64__4_1_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v3i64__0_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v3i64__1_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v3i64__2_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v3i64__3_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v3i64__4_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:9] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s6 @@ -10269,12 +10430,9 @@ define void @s_shuffle_v4i64_v3i64__4_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10332,12 +10490,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10396,13 +10551,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10465,15 +10618,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10531,14 +10681,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10601,15 +10747,12 @@ define void @s_shuffle_v4i64_v3i64__5_3_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10671,12 +10814,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10742,14 +10882,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10811,12 +10947,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10882,14 +11015,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10951,14 +11080,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11020,12 +11145,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11091,14 +11213,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11111,20 +11229,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() { } define void @s_shuffle_v4i64_v3i64__u_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -11132,20 +11278,48 @@ define void @s_shuffle_v4i64_v3i64__u_2_2_2() { } define void @s_shuffle_v4i64_v3i64__0_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -11153,22 +11327,53 @@ define void @s_shuffle_v4i64_v3i64__0_2_2_2() { } define void @s_shuffle_v4i64_v3i64__1_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -11176,22 +11381,53 @@ define void @s_shuffle_v4i64_v3i64__1_2_2_2() { } define void @s_shuffle_v4i64_v3i64__2_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -11199,20 +11435,48 @@ define void @s_shuffle_v4i64_v3i64__2_2_2_2() { } define void @s_shuffle_v4i64_v3i64__3_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -11269,12 +11533,9 @@ define void @s_shuffle_v4i64_v3i64__4_2_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11332,12 +11593,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11391,10 +11649,8 @@ define void @s_shuffle_v4i64_v3i64__5_u_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11456,14 +11712,10 @@ define void @s_shuffle_v4i64_v3i64__5_0_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11517,10 +11769,8 @@ define void @s_shuffle_v4i64_v3i64__5_1_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11578,12 +11828,9 @@ define void @s_shuffle_v4i64_v3i64__5_3_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11645,12 +11892,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11712,12 +11956,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11779,12 +12020,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11846,14 +12084,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11919,14 +12153,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11992,12 +12222,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12063,14 +12290,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12169,8 +12392,7 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12210,8 +12432,7 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12281,14 +12502,10 @@ define void @s_shuffle_v4i64_v3i64__4_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12341,14 +12558,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12397,12 +12610,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12465,15 +12675,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12531,12 +12738,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12598,14 +12802,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:21] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s20 -; GFX942-NEXT: s_mov_b32 s9, s21 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s16 -; GFX942-NEXT: s_mov_b32 s13, s17 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12658,14 +12858,10 @@ define void @s_shuffle_v4i64_v3i64__5_4_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12722,14 +12918,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12782,12 +12974,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12853,14 +13042,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:21] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s20 -; GFX942-NEXT: s_mov_b32 s9, s21 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12926,14 +13111,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:21] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s20 -; GFX942-NEXT: s_mov_b32 s9, s21 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12995,12 +13176,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13057,14 +13235,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13077,20 +13251,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() { } define void @s_shuffle_v4i64_v3i64__u_4_4_4() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13144,12 +13346,9 @@ define void @s_shuffle_v4i64_v3i64__0_4_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13211,12 +13410,9 @@ define void @s_shuffle_v4i64_v3i64__1_4_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13278,12 +13474,9 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13296,20 +13489,48 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() { } define void @s_shuffle_v4i64_v3i64__3_4_4_4() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13318,22 +13539,53 @@ define void @s_shuffle_v4i64_v3i64__3_4_4_4() { } define void @s_shuffle_v4i64_v3i64__4_4_4_4() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13342,22 +13594,53 @@ define void @s_shuffle_v4i64_v3i64__4_4_4_4() { } define void @s_shuffle_v4i64_v3i64__5_4_4_4() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -13402,12 +13685,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13466,15 +13746,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13532,12 +13809,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13599,12 +13873,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13657,14 +13928,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13717,14 +13984,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13773,12 +14036,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13840,12 +14100,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13907,12 +14164,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13974,12 +14228,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14028,14 +14279,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14048,20 +14295,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() { } define void @s_shuffle_v4i64_v3i64__u_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14119,12 +14394,9 @@ define void @s_shuffle_v4i64_v3i64__0_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14186,12 +14458,9 @@ define void @s_shuffle_v4i64_v3i64__1_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14253,12 +14522,9 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14271,20 +14537,48 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() { } define void @s_shuffle_v4i64_v3i64__3_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14293,22 +14587,53 @@ define void @s_shuffle_v4i64_v3i64__3_5_5_5() { } define void @s_shuffle_v4i64_v3i64__4_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14317,20 +14642,48 @@ define void @s_shuffle_v4i64_v3i64__4_5_5_5() { } define void @s_shuffle_v4i64_v3i64__5_u_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14388,12 +14741,9 @@ define void @s_shuffle_v4i64_v3i64__5_0_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14455,12 +14805,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14522,12 +14869,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14584,14 +14928,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14604,20 +14944,48 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() { } define void @s_shuffle_v4i64_v3i64__5_4_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x i64> asm "; def $0", "=s"() %vec1 = call <3 x i64> asm "; def $0", "=s"() %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> @@ -14666,12 +15034,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14737,14 +15102,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14810,14 +15171,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14879,12 +15236,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14937,14 +15291,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15001,14 +15351,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll index ea9ef2f1ac94a..d026e3b08b171 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll @@ -13289,8 +13289,7 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13330,8 +13329,7 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13375,8 +13373,7 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13434,8 +13431,7 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13476,8 +13472,7 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13522,8 +13517,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13582,10 +13576,8 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13639,8 +13631,7 @@ define void @s_shuffle_v4i64_v4i64__7_1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13698,10 +13689,8 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13755,10 +13744,8 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13807,10 +13794,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13823,18 +13808,43 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() { } define void @s_shuffle_v4i64_v4i64__7_5_u_u() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -13879,10 +13889,8 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13927,10 +13935,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13989,13 +13995,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14054,13 +14058,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14114,10 +14116,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14175,12 +14175,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14229,12 +14226,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14283,12 +14277,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14301,20 +14292,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() { } define void @s_shuffle_v4i64_v4i64__7_7_6_u() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14363,12 +14382,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14434,14 +14450,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14507,14 +14519,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14580,14 +14588,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14649,12 +14653,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14711,14 +14712,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14771,14 +14768,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14835,14 +14828,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14855,22 +14844,53 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() { } define void @s_shuffle_v4i64_v4i64__7_7_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -14919,12 +14939,9 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14936,22 +14953,53 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() { } define void @s_shuffle_v4i64_v4i64__0_0_0_0() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: s_mov_b32 s14, s8 +; GFX900-NEXT: s_mov_b32 s15, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: s_mov_b32 s14, s8 +; GFX90A-NEXT: s_mov_b32 s15, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -15003,14 +15051,10 @@ define void @s_shuffle_v4i64_v4i64__1_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15062,14 +15106,10 @@ define void @s_shuffle_v4i64_v4i64__2_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15125,14 +15165,10 @@ define void @s_shuffle_v4i64_v4i64__3_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15184,12 +15220,9 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15251,17 +15284,13 @@ define void @s_shuffle_v4i64_v4i64__5_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15324,15 +15353,12 @@ define void @s_shuffle_v4i64_v4i64__6_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15395,17 +15421,13 @@ define void @s_shuffle_v4i64_v4i64__7_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15464,15 +15486,12 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15535,17 +15554,13 @@ define void @s_shuffle_v4i64_v4i64__7_1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15611,14 +15626,10 @@ define void @s_shuffle_v4i64_v4i64__7_2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15680,14 +15691,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15750,17 +15757,13 @@ define void @s_shuffle_v4i64_v4i64__7_4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15822,12 +15825,9 @@ define void @s_shuffle_v4i64_v4i64__7_5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15893,14 +15893,10 @@ define void @s_shuffle_v4i64_v4i64__7_6_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15963,15 +15959,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16030,13 +16023,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16099,15 +16090,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16169,14 +16157,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16238,14 +16222,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16304,15 +16284,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16375,15 +16352,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16445,12 +16419,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16463,20 +16434,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() { } define void @s_shuffle_v4i64_v4i64__u_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -16484,89 +16483,210 @@ define void @s_shuffle_v4i64_v4i64__u_1_1_1() { } define void @s_shuffle_v4i64_v4i64__0_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__1_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__2_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__1_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__2_1_1_1() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } define void @s_shuffle_v4i64_v4i64__3_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -16574,20 +16694,48 @@ define void @s_shuffle_v4i64_v4i64__3_1_1_1() { } define void @s_shuffle_v4i64_v4i64__4_1_1_1() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) @@ -16644,12 +16792,9 @@ define void @s_shuffle_v4i64_v4i64__5_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16711,12 +16856,9 @@ define void @s_shuffle_v4i64_v4i64__6_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16778,12 +16920,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16842,15 +16981,12 @@ define void @s_shuffle_v4i64_v4i64__7_u_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16913,17 +17049,13 @@ define void @s_shuffle_v4i64_v4i64__7_0_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16985,14 +17117,10 @@ define void @s_shuffle_v4i64_v4i64__7_2_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17054,14 +17182,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17124,17 +17248,13 @@ define void @s_shuffle_v4i64_v4i64__7_4_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17196,12 +17316,9 @@ define void @s_shuffle_v4i64_v4i64__7_5_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17267,14 +17384,10 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17337,15 +17450,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17404,13 +17514,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17473,15 +17581,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17539,14 +17644,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17604,14 +17705,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17670,15 +17767,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17741,15 +17835,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17811,12 +17902,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17829,149 +17917,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() { } define void @s_shuffle_v4i64_v4i64__u_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__0_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__1_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__2_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__3_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__4_2_2_2() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__5_2_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: +; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -17981,17 +17932,12 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -18001,42 +17947,31 @@ define void @s_shuffle_v4i64_v4i64__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__6_2_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: +define void @s_shuffle_v4i64_v4i64__0_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -18046,15 +17981,12 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -18064,42 +17996,31 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_2_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: +define void @s_shuffle_v4i64_v4i64__1_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -18111,15 +18032,12 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -18131,44 +18049,36 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_u_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: +define void @s_shuffle_v4i64_v4i64__2_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18176,17 +18086,16 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18194,115 +18103,88 @@ define void @s_shuffle_v4i64_v4i64__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_0_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: +define void @s_shuffle_v4i64_v4i64__3_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_1_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: +define void @s_shuffle_v4i64_v4i64__4_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18310,17 +18192,14 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18328,32 +18207,26 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_3_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: +define void @s_shuffle_v4i64_v4i64__5_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -18362,10 +18235,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18373,7 +18246,7 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -18382,10 +18255,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18393,7 +18266,7 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18402,25 +18275,22 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_4_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: +define void @s_shuffle_v4i64_v4i64__6_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -18429,10 +18299,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18440,7 +18308,7 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -18449,10 +18317,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18460,7 +18326,7 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18469,65 +18335,62 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_5_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: +define void @s_shuffle_v4i64_v4i64__7_2_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18536,37 +18399,32 @@ define void @s_shuffle_v4i64_v4i64__7_5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_6_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: +define void @s_shuffle_v4i64_v4i64__7_u_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18574,19 +18432,17 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18594,7 +18450,7 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18603,61 +18459,65 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_2_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: +define void @s_shuffle_v4i64_v4i64__7_0_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s12 -; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s12 -; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18666,35 +18526,33 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_u_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: +define void @s_shuffle_v4i64_v4i64__7_1_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18702,17 +18560,17 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18720,7 +18578,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18729,61 +18587,61 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_0_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: +define void @s_shuffle_v4i64_v4i64__7_3_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18792,67 +18650,62 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_1_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: +define void @s_shuffle_v4i64_v4i64__7_4_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18861,39 +18714,34 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_3_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: +define void @s_shuffle_v4i64_v4i64__7_5_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: s_mov_b32 s14, s16 ; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART @@ -18901,19 +18749,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: s_mov_b32 s14, s16 ; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART @@ -18921,7 +18769,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18930,203 +18778,182 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_4_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: +define void @s_shuffle_v4i64_v4i64__7_6_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_5_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: +define void @s_shuffle_v4i64_v4i64__7_7_2_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_6_2() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: +define void @s_shuffle_v4i64_v4i64__7_7_u_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19135,197 +18962,58 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__u_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__0_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__1_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__2_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__3_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__4_3_3_3() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__5_3_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: +define void @s_shuffle_v4i64_v4i64__7_7_0_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19334,61 +19022,63 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__6_3_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: +define void @s_shuffle_v4i64_v4i64__7_7_1_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19397,65 +19087,63 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_3_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: +define void @s_shuffle_v4i64_v4i64__7_7_3_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19464,86 +19152,87 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_u_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: +define void @s_shuffle_v4i64_v4i64__7_7_4_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_0_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: +define void @s_shuffle_v4i64_v4i64__7_7_5_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -19554,18 +19243,16 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -19576,83 +19263,81 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_1_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: +define void @s_shuffle_v4i64_v4i64__7_7_6_2() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19661,35 +19346,29 @@ define void @s_shuffle_v4i64_v4i64__7_1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_2_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: +define void @s_shuffle_v4i64_v4i64__u_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19697,19 +19376,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19717,46 +19391,33 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_4_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: +define void @s_shuffle_v4i64_v4i64__0_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19764,19 +19425,14 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19784,113 +19440,89 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_5_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: +define void @s_shuffle_v4i64_v4i64__1_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_6_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: +define void @s_shuffle_v4i64_v4i64__2_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19898,19 +19530,16 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19918,44 +19547,36 @@ define void @s_shuffle_v4i64_v4i64__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_3_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: +define void @s_shuffle_v4i64_v4i64__3_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19963,17 +19584,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19981,127 +19601,116 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_u_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: +define void @s_shuffle_v4i64_v4i64__4_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_0_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: +define void @s_shuffle_v4i64_v4i64__5_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20110,67 +19719,58 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_1_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: +define void @s_shuffle_v4i64_v4i64__6_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20179,27 +19779,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_2_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: +define void @s_shuffle_v4i64_v4i64__7_3_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -20210,12 +19805,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -20226,12 +19825,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20240,23 +19843,22 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_4_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: +define void @s_shuffle_v4i64_v4i64__7_u_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -20267,14 +19869,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -20285,14 +19887,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20301,61 +19903,65 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_5_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: +define void @s_shuffle_v4i64_v4i64__7_0_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20364,65 +19970,59 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_6_3() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: +define void @s_shuffle_v4i64_v4i64__7_1_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20431,584 +20031,609 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__u_4_4_4() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__0_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_2_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__1_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_4_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__2_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_5_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__3_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_6_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__4_4_4_4() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__5_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_3_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__6_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_u_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_4_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_0_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_u_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_1_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_0_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_2_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_1_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_4_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -21017,25 +20642,22 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_2_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_5_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -21046,18 +20668,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -21068,304 +20686,240 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s16 -; GFX942-NEXT: s_mov_b32 s13, s17 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_3_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +define void @s_shuffle_v4i64_v4i64__7_7_6_3() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s16 -; GFX942-NEXT: s_mov_b32 s13, s17 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_5_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +define void @s_shuffle_v4i64_v4i64__u_4_4_4() { +; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:15] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__0_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s6 -; GFX900-NEXT: s_mov_b32 s11, s7 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s6 -; GFX90A-NEXT: s_mov_b32 s11, s7 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_6_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +define void @s_shuffle_v4i64_v4i64__1_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_4_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +define void @s_shuffle_v4i64_v4i64__2_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_u_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +define void @s_shuffle_v4i64_v4i64__3_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -21373,14 +20927,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -21388,190 +20940,171 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_0_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +define void @s_shuffle_v4i64_v4i64__4_4_4_4() { +; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:15] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__5_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s22 -; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s4 ; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s22 -; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s4 ; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_1_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +define void @s_shuffle_v4i64_v4i64__6_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s22 -; GFX900-NEXT: s_mov_b32 s11, s23 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s22 -; GFX90A-NEXT: s_mov_b32 s11, s23 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_2_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +define void @s_shuffle_v4i64_v4i64__7_4_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: s_mov_b32 s14, s4 ; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART @@ -21579,17 +21112,18 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: s_mov_b32 s14, s4 ; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART @@ -21597,46 +21131,38 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_3_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +define void @s_shuffle_v4i64_v4i64__7_u_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: s_mov_b32 s14, s4 ; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART @@ -21644,19 +21170,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: s_mov_b32 s14, s4 ; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART @@ -21664,107 +21187,110 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:23] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_5_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +define void @s_shuffle_v4i64_v4i64__7_0_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_6_4() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +define void @s_shuffle_v4i64_v4i64__7_1_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s22 -; GFX900-NEXT: s_mov_b32 s11, s23 -; GFX900-NEXT: s_mov_b32 s12, s20 -; GFX900-NEXT: s_mov_b32 s13, s21 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: s_mov_b32 s14, s16 ; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART @@ -21772,18 +21298,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s22 -; GFX90A-NEXT: s_mov_b32 s11, s23 -; GFX90A-NEXT: s_mov_b32 s12, s20 -; GFX90A-NEXT: s_mov_b32 s13, s21 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: s_mov_b32 s14, s16 ; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART @@ -21791,65 +21318,1353 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_2_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_3_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_5_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_6_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_4_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_u_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_0_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_1_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_2_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_3_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_5_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_7_6_4() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s12, s20 +; GFX900-NEXT: s_mov_b32 s13, s21 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s12, s20 +; GFX90A-NEXT: s_mov_b32 s13, s21 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } define void @s_shuffle_v4i64_v4i64__u_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__0_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__1_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__2_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__3_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__4_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__5_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__6_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_5_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_u_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__0_5_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +define void @s_shuffle_v4i64_v4i64__7_0_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -21857,17 +22672,19 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -21875,74 +22692,69 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__1_5_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +define void @s_shuffle_v4i64_v4i64__7_1_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 -; GFX900-NEXT: s_mov_b32 s14, s10 -; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 -; GFX90A-NEXT: s_mov_b32 s14, s10 -; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -21951,253 +22763,150 @@ define void @s_shuffle_v4i64_v4i64__1_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__2_5_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +define void @s_shuffle_v4i64_v4i64__7_2_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 -; GFX900-NEXT: s_mov_b32 s14, s10 -; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 -; GFX90A-NEXT: s_mov_b32 s14, s10 -; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__3_5_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +define void @s_shuffle_v4i64_v4i64__7_3_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 -; GFX900-NEXT: s_mov_b32 s14, s10 -; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 -; GFX90A-NEXT: s_mov_b32 s14, s10 -; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__4_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__5_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__6_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__7_5_5_5() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_u_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +define void @s_shuffle_v4i64_v4i64__7_4_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -22205,6 +22914,8 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: s_mov_b32 s14, s6 @@ -22214,7 +22925,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -22222,6 +22933,8 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: s_mov_b32 s14, s6 @@ -22231,43 +22944,38 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_0_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: +define void @s_shuffle_v4i64_v4i64__7_6_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -22275,19 +22983,16 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -22295,114 +23000,92 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_1_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: +define void @s_shuffle_v4i64_v4i64__7_7_5_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_2_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: +define void @s_shuffle_v4i64_v4i64__7_7_u_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: s_mov_b32 s14, s6 ; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -22410,21 +23093,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: s_mov_b32 s14, s6 ; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -22432,34 +23108,28 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_3_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: +define void @s_shuffle_v4i64_v4i64__7_7_0_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -22470,14 +23140,16 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -22488,174 +23160,176 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_4_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: +define void @s_shuffle_v4i64_v4i64__7_7_1_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_6_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: +define void @s_shuffle_v4i64_v4i64__7_7_2_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_5_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: +define void @s_shuffle_v4i64_v4i64__7_7_3_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: s_mov_b32 s14, s6 ; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -22663,16 +23337,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: s_mov_b32 s14, s6 ; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -22680,33 +23357,31 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_u_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: +define void @s_shuffle_v4i64_v4i64__7_7_4_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -22714,6 +23389,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: s_mov_b32 s14, s6 ; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -22721,7 +23398,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -22729,6 +23406,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: s_mov_b32 s14, s6 ; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -22736,201 +23415,175 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_0_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: +define void @s_shuffle_v4i64_v4i64__7_7_6_5() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s18 ; GFX900-NEXT: s_mov_b32 s11, s19 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s18 ; GFX90A-NEXT: s_mov_b32 s11, s19 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_1_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: +define void @s_shuffle_v4i64_v4i64__u_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s18 -; GFX900-NEXT: s_mov_b32 s11, s19 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s18 -; GFX90A-NEXT: s_mov_b32 s11, s19 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_2_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: +define void @s_shuffle_v4i64_v4i64__0_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -22939,311 +23592,271 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_3_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: +define void @s_shuffle_v4i64_v4i64__1_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_4_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: +define void @s_shuffle_v4i64_v4i64__2_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__7_7_6_5() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: +define void @s_shuffle_v4i64_v4i64__3_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s18 -; GFX900-NEXT: s_mov_b32 s11, s19 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s18 -; GFX90A-NEXT: s_mov_b32 s11, s19 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__u_6_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__0_6_6_6() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: +define void @s_shuffle_v4i64_v4i64__4_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__1_6_6_6() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: +define void @s_shuffle_v4i64_v4i64__5_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -23253,17 +23866,14 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -23273,44 +23883,35 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__2_6_6_6() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: +define void @s_shuffle_v4i64_v4i64__6_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -23320,17 +23921,14 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -23340,44 +23938,35 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) ret void } -define void @s_shuffle_v4i64_v4i64__3_6_6_6() { -; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: +define void @s_shuffle_v4i64_v4i64__7_6_6_6() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -23387,17 +23976,14 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -23407,119 +23993,19 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__4_6_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__5_6_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__6_6_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__7_6_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23528,20 +24014,48 @@ define void @s_shuffle_v4i64_v4i64__7_6_6_6() { } define void @s_shuffle_v4i64_v4i64__7_u_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23599,12 +24113,9 @@ define void @s_shuffle_v4i64_v4i64__7_0_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23666,12 +24177,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23733,12 +24241,9 @@ define void @s_shuffle_v4i64_v4i64__7_2_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23800,12 +24305,9 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23862,14 +24364,10 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23882,20 +24380,48 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() { } define void @s_shuffle_v4i64_v4i64__7_5_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23904,22 +24430,53 @@ define void @s_shuffle_v4i64_v4i64__7_5_6_6() { } define void @s_shuffle_v4i64_v4i64__7_7_6_6() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -23968,12 +24525,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24039,14 +24593,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24112,14 +24662,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24181,12 +24727,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24252,14 +24795,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24312,14 +24851,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24376,14 +24911,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24396,20 +24927,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() { } define void @s_shuffle_v4i64_v4i64__u_7_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24467,12 +25026,9 @@ define void @s_shuffle_v4i64_v4i64__0_7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24534,12 +25090,9 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24601,12 +25154,9 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24659,117 +25209,232 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7: +; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__4_7_7_7() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__5_7_7_7() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__6_7_7_7() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x i64> asm "; def $0", "=s"() + %vec1 = call <4 x i64> asm "; def $0", "=s"() + %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) + ret void +} + +define void @s_shuffle_v4i64_v4i64__7_u_7_7() { +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__4_7_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__5_7_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__6_7_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x i64> asm "; def $0", "=s"() - %vec1 = call <4 x i64> asm "; def $0", "=s"() - %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf) - ret void -} - -define void @s_shuffle_v4i64_v4i64__7_u_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -24827,12 +25492,9 @@ define void @s_shuffle_v4i64_v4i64__7_0_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24894,12 +25556,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24961,12 +25620,9 @@ define void @s_shuffle_v4i64_v4i64__7_2_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25028,12 +25684,9 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25090,14 +25743,10 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25110,20 +25759,48 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() { } define void @s_shuffle_v4i64_v4i64__7_5_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25132,22 +25809,53 @@ define void @s_shuffle_v4i64_v4i64__7_5_7_7() { } define void @s_shuffle_v4i64_v4i64__7_6_7_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25156,20 +25864,48 @@ define void @s_shuffle_v4i64_v4i64__7_6_7_7() { } define void @s_shuffle_v4i64_v4i64__7_7_u_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> @@ -25227,12 +25963,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25294,12 +26027,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25361,12 +26091,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25428,12 +26155,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25486,14 +26210,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25550,14 +26270,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25570,20 +26286,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() { } define void @s_shuffle_v4i64_v4i64__7_7_6_7() { -; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x i64> asm "; def $0", "=s"() %vec1 = call <4 x i64> asm "; def $0", "=s"() %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll index b30af835a7882..14bacc2f74876 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll @@ -3734,8 +3734,7 @@ define void @s_shuffle_v4p0_v2p0__1_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3793,8 +3792,7 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3852,10 +3850,8 @@ define void @s_shuffle_v4p0_v2p0__3_0_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3909,8 +3905,7 @@ define void @s_shuffle_v4p0_v2p0__3_1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3959,10 +3954,8 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -3975,18 +3968,43 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() { } define void @s_shuffle_v4p0_v2p0__3_3_u_u() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -3995,21 +4013,52 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_u() { } define void @s_shuffle_v4p0_v2p0__3_3_0_u() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4063,10 +4112,8 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4079,20 +4126,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() { } define void @s_shuffle_v4p0_v2p0__3_3_2_u() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4101,20 +4176,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_u() { } define void @s_shuffle_v4p0_v2p0__3_3_3_u() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4172,12 +4275,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4190,23 +4290,57 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() { } define void @s_shuffle_v4p0_v2p0__3_3_3_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -4259,14 +4393,10 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4279,44 +4409,103 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() { } define void @s_shuffle_v4p0_v2p0__3_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x ptr> asm "; def $0", "=s"() - %vec1 = call <2 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v2p0__u_0_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x ptr> asm "; def $0", "=s"() + %vec1 = call <2 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v2p0__u_0_0_0() { +; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4324,22 +4513,53 @@ define void @s_shuffle_v4p0_v2p0__u_0_0_0() { } define void @s_shuffle_v4p0_v2p0__0_0_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: s_mov_b32 s14, s8 +; GFX900-NEXT: s_mov_b32 s15, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: s_mov_b32 s14, s8 +; GFX90A-NEXT: s_mov_b32 s15, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4347,22 +4567,53 @@ define void @s_shuffle_v4p0_v2p0__0_0_0_0() { } define void @s_shuffle_v4p0_v2p0__1_0_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4370,20 +4621,48 @@ define void @s_shuffle_v4p0_v2p0__1_0_0_0() { } define void @s_shuffle_v4p0_v2p0__2_0_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4440,12 +4719,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4503,10 +4779,8 @@ define void @s_shuffle_v4p0_v2p0__3_u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4568,12 +4842,9 @@ define void @s_shuffle_v4p0_v2p0__3_1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4635,12 +4906,9 @@ define void @s_shuffle_v4p0_v2p0__3_2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4653,57 +4921,91 @@ define void @s_shuffle_v4p0_v2p0__3_2_0_0() { } define void @s_shuffle_v4p0_v2p0__3_3_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x ptr> asm "; def $0", "=s"() - %vec1 = call <2 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v2p0__3_3_u_0() { -; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0: +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ; def s[12:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ; def s[8:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0: +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ; def s[12:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:7] +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x ptr> asm "; def $0", "=s"() + %vec1 = call <2 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v2p0__3_3_u_0() { +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:7] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 @@ -4723,10 +5025,8 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4788,12 +5088,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4855,12 +5152,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -4873,20 +5167,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() { } define void @s_shuffle_v4p0_v2p0__u_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4894,20 +5216,48 @@ define void @s_shuffle_v4p0_v2p0__u_1_1_1() { } define void @s_shuffle_v4p0_v2p0__0_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4915,22 +5265,53 @@ define void @s_shuffle_v4p0_v2p0__0_1_1_1() { } define void @s_shuffle_v4p0_v2p0__1_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -4938,20 +5319,48 @@ define void @s_shuffle_v4p0_v2p0__1_1_1_1() { } define void @s_shuffle_v4p0_v2p0__2_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -5008,12 +5417,9 @@ define void @s_shuffle_v4p0_v2p0__3_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5071,10 +5477,8 @@ define void @s_shuffle_v4p0_v2p0__3_u_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5136,12 +5540,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5203,12 +5604,9 @@ define void @s_shuffle_v4p0_v2p0__3_2_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5221,46 +5619,111 @@ define void @s_shuffle_v4p0_v2p0__3_2_1_1() { } define void @s_shuffle_v4p0_v2p0__3_3_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x ptr> asm "; def $0", "=s"() - %vec1 = call <2 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x ptr> asm "; def $0", "=s"() + %vec1 = call <2 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} define void @s_shuffle_v4p0_v2p0__3_3_u_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5269,21 +5732,52 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_1() { } define void @s_shuffle_v4p0_v2p0__3_3_0_1() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5341,12 +5835,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5445,8 +5936,7 @@ define void @s_shuffle_v4p0_v2p0__1_2_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5472,22 +5962,53 @@ define void @s_shuffle_v4p0_v2p0__2_2_2_2() { } define void @s_shuffle_v4p0_v2p0__3_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5496,20 +6017,48 @@ define void @s_shuffle_v4p0_v2p0__3_2_2_2() { } define void @s_shuffle_v4p0_v2p0__3_u_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5567,12 +6116,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5585,23 +6131,57 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() { } define void @s_shuffle_v4p0_v2p0__3_1_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5610,22 +6190,53 @@ define void @s_shuffle_v4p0_v2p0__3_1_2_2() { } define void @s_shuffle_v4p0_v2p0__3_3_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5674,12 +6285,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5741,12 +6349,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5812,14 +6417,10 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5832,20 +6433,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() { } define void @s_shuffle_v4p0_v2p0__u_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5854,23 +6483,57 @@ define void @s_shuffle_v4p0_v2p0__u_3_3_3() { } define void @s_shuffle_v4p0_v2p0__0_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -5928,12 +6591,9 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -5946,78 +6606,134 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() { } define void @s_shuffle_v4p0_v2p0__2_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x ptr> asm "; def $0", "=s"() - %vec1 = call <2 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v2p0__3_u_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <2 x ptr> asm "; def $0", "=s"() - %vec1 = call <2 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v2p0__3_0_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3: +; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ; def s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ; def s[8:11] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:7] +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x ptr> asm "; def $0", "=s"() + %vec1 = call <2 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v2p0__3_u_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <2 x ptr> asm "; def $0", "=s"() + %vec1 = call <2 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v2p0__3_0_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:7] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:7] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s14 ; GFX90A-NEXT: s_mov_b32 s9, s15 @@ -6039,12 +6755,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -6057,23 +6770,57 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() { } define void @s_shuffle_v4p0_v2p0__3_1_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -6082,22 +6829,53 @@ define void @s_shuffle_v4p0_v2p0__3_1_3_3() { } define void @s_shuffle_v4p0_v2p0__3_2_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -6106,20 +6884,48 @@ define void @s_shuffle_v4p0_v2p0__3_2_3_3() { } define void @s_shuffle_v4p0_v2p0__3_3_u_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -6128,23 +6934,57 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_3() { } define void @s_shuffle_v4p0_v2p0__3_3_0_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:11] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> @@ -6202,12 +7042,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:3] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -6220,20 +7057,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() { } define void @s_shuffle_v4p0_v2p0__3_3_2_3() { -; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[12:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <2 x ptr> asm "; def $0", "=s"() %vec1 = call <2 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll index e6ac554735eee..0398418b82f3d 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll @@ -7818,8 +7818,7 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -7859,8 +7858,7 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -7918,8 +7916,7 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -7960,8 +7957,7 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8016,11 +8012,11 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8070,8 +8066,7 @@ define void @s_shuffle_v4p0_v3p0__5_1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8125,10 +8120,8 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8173,10 +8166,8 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8189,18 +8180,43 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() { } define void @s_shuffle_v4p0_v3p0__5_4_u_u() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8245,10 +8261,8 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8310,12 +8324,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8377,12 +8388,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8440,10 +8448,8 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8492,12 +8498,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8550,12 +8553,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8568,20 +8568,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() { } define void @s_shuffle_v4p0_v3p0__5_5_5_u() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8639,12 +8667,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8706,12 +8731,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8773,12 +8795,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8835,14 +8854,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8895,14 +8910,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8915,22 +8926,53 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() { } define void @s_shuffle_v4p0_v3p0__5_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -8979,12 +9021,9 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -8996,22 +9035,53 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() { } define void @s_shuffle_v4p0_v3p0__0_0_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: s_mov_b32 s14, s8 +; GFX900-NEXT: s_mov_b32 s15, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: s_mov_b32 s14, s8 +; GFX90A-NEXT: s_mov_b32 s15, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -9063,14 +9133,10 @@ define void @s_shuffle_v4p0_v3p0__1_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9122,14 +9188,10 @@ define void @s_shuffle_v4p0_v3p0__2_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9181,12 +9243,9 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9248,17 +9307,13 @@ define void @s_shuffle_v4p0_v3p0__4_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9321,15 +9376,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9388,13 +9440,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9457,15 +9507,12 @@ define void @s_shuffle_v4p0_v3p0__5_1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9527,14 +9574,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9597,15 +9640,12 @@ define void @s_shuffle_v4p0_v3p0__5_3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9667,12 +9707,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9738,14 +9775,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9807,12 +9840,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9878,14 +9908,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -9951,14 +9977,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10020,12 +10042,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10091,14 +10110,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10111,126 +10126,12 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() { } define void @s_shuffle_v4p0_v3p0__u_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v3p0__0_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v3p0__1_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v3p0__2_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v3p0__3_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <3 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v3p0__4_1_1_1() { -; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1: +; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:13] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:9] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s12, s10 ; GFX900-NEXT: s_mov_b32 s13, s11 ; GFX900-NEXT: s_mov_b32 s14, s10 @@ -10240,13 +10141,273 @@ define void @s_shuffle_v4p0_v3p0__4_1_1_1() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1: +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:13] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v3p0__0_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v3p0__1_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v3p0__2_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v3p0__3_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <3 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v3p0__4_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:9] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:9] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s6 @@ -10269,12 +10430,9 @@ define void @s_shuffle_v4p0_v3p0__4_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10332,12 +10490,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10396,13 +10551,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10465,15 +10618,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10531,14 +10681,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10601,15 +10747,12 @@ define void @s_shuffle_v4p0_v3p0__5_3_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10671,12 +10814,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10742,14 +10882,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10811,12 +10947,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10882,14 +11015,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -10951,14 +11080,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11020,12 +11145,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11091,14 +11213,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11111,20 +11229,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() { } define void @s_shuffle_v4p0_v3p0__u_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -11132,20 +11278,48 @@ define void @s_shuffle_v4p0_v3p0__u_2_2_2() { } define void @s_shuffle_v4p0_v3p0__0_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -11153,22 +11327,53 @@ define void @s_shuffle_v4p0_v3p0__0_2_2_2() { } define void @s_shuffle_v4p0_v3p0__1_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -11176,22 +11381,53 @@ define void @s_shuffle_v4p0_v3p0__1_2_2_2() { } define void @s_shuffle_v4p0_v3p0__2_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -11199,20 +11435,48 @@ define void @s_shuffle_v4p0_v3p0__2_2_2_2() { } define void @s_shuffle_v4p0_v3p0__3_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -11269,12 +11533,9 @@ define void @s_shuffle_v4p0_v3p0__4_2_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11332,12 +11593,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11391,10 +11649,8 @@ define void @s_shuffle_v4p0_v3p0__5_u_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11456,14 +11712,10 @@ define void @s_shuffle_v4p0_v3p0__5_0_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11517,10 +11769,8 @@ define void @s_shuffle_v4p0_v3p0__5_1_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11578,12 +11828,9 @@ define void @s_shuffle_v4p0_v3p0__5_3_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11645,12 +11892,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11712,12 +11956,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11779,12 +12020,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11846,14 +12084,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11919,14 +12153,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -11992,12 +12222,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12063,14 +12290,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12169,8 +12392,7 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12210,8 +12432,7 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12281,14 +12502,10 @@ define void @s_shuffle_v4p0_v3p0__4_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12341,14 +12558,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12397,12 +12610,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12465,15 +12675,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12531,12 +12738,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12598,14 +12802,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:21] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s20 -; GFX942-NEXT: s_mov_b32 s9, s21 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s16 -; GFX942-NEXT: s_mov_b32 s13, s17 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12658,14 +12858,10 @@ define void @s_shuffle_v4p0_v3p0__5_4_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12722,14 +12918,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12782,12 +12974,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12853,14 +13042,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:21] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s20 -; GFX942-NEXT: s_mov_b32 s9, s21 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12926,14 +13111,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[16:21] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s20 -; GFX942-NEXT: s_mov_b32 s9, s21 -; GFX942-NEXT: s_mov_b32 s10, s20 -; GFX942-NEXT: s_mov_b32 s11, s21 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21] +; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -12995,12 +13176,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13057,14 +13235,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13077,20 +13251,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() { } define void @s_shuffle_v4p0_v3p0__u_4_4_4() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13144,12 +13346,9 @@ define void @s_shuffle_v4p0_v3p0__0_4_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13211,12 +13410,9 @@ define void @s_shuffle_v4p0_v3p0__1_4_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13278,12 +13474,9 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13296,20 +13489,48 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() { } define void @s_shuffle_v4p0_v3p0__3_4_4_4() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13318,22 +13539,53 @@ define void @s_shuffle_v4p0_v3p0__3_4_4_4() { } define void @s_shuffle_v4p0_v3p0__4_4_4_4() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13342,22 +13594,53 @@ define void @s_shuffle_v4p0_v3p0__4_4_4_4() { } define void @s_shuffle_v4p0_v3p0__5_4_4_4() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -13402,12 +13685,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13466,15 +13746,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:9] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13532,12 +13809,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13599,12 +13873,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13657,14 +13928,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13717,14 +13984,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13773,12 +14036,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13840,12 +14100,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13907,12 +14164,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13974,12 +14228,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14028,14 +14279,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14048,20 +14295,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() { } define void @s_shuffle_v4p0_v3p0__u_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14119,12 +14394,9 @@ define void @s_shuffle_v4p0_v3p0__0_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14186,12 +14458,9 @@ define void @s_shuffle_v4p0_v3p0__1_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14253,12 +14522,9 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14271,20 +14537,48 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() { } define void @s_shuffle_v4p0_v3p0__3_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14293,22 +14587,53 @@ define void @s_shuffle_v4p0_v3p0__3_5_5_5() { } define void @s_shuffle_v4p0_v3p0__4_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14317,20 +14642,48 @@ define void @s_shuffle_v4p0_v3p0__4_5_5_5() { } define void @s_shuffle_v4p0_v3p0__5_u_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14388,12 +14741,9 @@ define void @s_shuffle_v4p0_v3p0__5_0_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14455,12 +14805,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14522,12 +14869,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s12 -; GFX942-NEXT: s_mov_b32 s9, s13 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14584,14 +14928,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14604,20 +14944,48 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() { } define void @s_shuffle_v4p0_v3p0__5_4_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:13] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:13] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:13] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:13] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <3 x ptr> asm "; def $0", "=s"() %vec1 = call <3 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> @@ -14666,12 +15034,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14737,14 +15102,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14810,14 +15171,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s16 -; GFX942-NEXT: s_mov_b32 s9, s17 -; GFX942-NEXT: s_mov_b32 s10, s16 -; GFX942-NEXT: s_mov_b32 s11, s17 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17] +; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14879,12 +15236,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14937,14 +15291,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15001,14 +15351,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:5] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll index ce1c54129f706..5e61b0b51e280 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll @@ -13289,8 +13289,7 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13330,8 +13329,7 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13375,8 +13373,7 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13434,8 +13431,7 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13476,8 +13472,7 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13522,8 +13517,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13582,10 +13576,8 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13639,8 +13631,7 @@ define void @s_shuffle_v4p0_v4p0__7_1_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13698,10 +13689,8 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13755,10 +13744,8 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13807,10 +13794,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13823,18 +13808,43 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() { } define void @s_shuffle_v4p0_v4p0__7_5_u_u() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -13879,10 +13889,8 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13927,10 +13935,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -13989,13 +13995,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14054,13 +14058,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14114,10 +14116,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14175,12 +14175,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14229,12 +14226,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14283,12 +14277,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14301,20 +14292,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() { } define void @s_shuffle_v4p0_v4p0__7_7_6_u() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14363,12 +14382,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14434,14 +14450,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14507,14 +14519,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14580,14 +14588,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14649,12 +14653,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14711,14 +14712,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14771,14 +14768,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14835,14 +14828,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14855,22 +14844,53 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() { } define void @s_shuffle_v4p0_v4p0__7_7_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -14919,12 +14939,9 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -14936,22 +14953,53 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() { } define void @s_shuffle_v4p0_v4p0__0_0_0_0() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s8 -; GFX9-NEXT: s_mov_b32 s11, s9 -; GFX9-NEXT: s_mov_b32 s12, s8 -; GFX9-NEXT: s_mov_b32 s13, s9 -; GFX9-NEXT: s_mov_b32 s14, s8 -; GFX9-NEXT: s_mov_b32 s15, s9 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s8 +; GFX900-NEXT: s_mov_b32 s11, s9 +; GFX900-NEXT: s_mov_b32 s12, s8 +; GFX900-NEXT: s_mov_b32 s13, s9 +; GFX900-NEXT: s_mov_b32 s14, s8 +; GFX900-NEXT: s_mov_b32 s15, s9 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s8 +; GFX90A-NEXT: s_mov_b32 s11, s9 +; GFX90A-NEXT: s_mov_b32 s12, s8 +; GFX90A-NEXT: s_mov_b32 s13, s9 +; GFX90A-NEXT: s_mov_b32 s14, s8 +; GFX90A-NEXT: s_mov_b32 s15, s9 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9] +; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9] +; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> zeroinitializer call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -15003,14 +15051,10 @@ define void @s_shuffle_v4p0_v4p0__1_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15062,14 +15106,10 @@ define void @s_shuffle_v4p0_v4p0__2_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15125,14 +15165,10 @@ define void @s_shuffle_v4p0_v4p0__3_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15184,12 +15220,9 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15251,17 +15284,13 @@ define void @s_shuffle_v4p0_v4p0__5_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15324,15 +15353,12 @@ define void @s_shuffle_v4p0_v4p0__6_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15395,17 +15421,13 @@ define void @s_shuffle_v4p0_v4p0__7_0_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15464,15 +15486,12 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15535,17 +15554,13 @@ define void @s_shuffle_v4p0_v4p0__7_1_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15611,14 +15626,10 @@ define void @s_shuffle_v4p0_v4p0__7_2_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15680,14 +15691,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15750,17 +15757,13 @@ define void @s_shuffle_v4p0_v4p0__7_4_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15822,12 +15825,9 @@ define void @s_shuffle_v4p0_v4p0__7_5_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15893,14 +15893,10 @@ define void @s_shuffle_v4p0_v4p0__7_6_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -15963,15 +15959,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16030,13 +16023,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16099,15 +16090,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16169,14 +16157,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16238,14 +16222,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16304,15 +16284,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16375,15 +16352,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s0 +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16445,12 +16419,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16463,20 +16434,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() { } define void @s_shuffle_v4p0_v4p0__u_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -16484,89 +16483,210 @@ define void @s_shuffle_v4p0_v4p0__u_1_1_1() { } define void @s_shuffle_v4p0_v4p0__0_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__1_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__2_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__1_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__2_1_1_1() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } define void @s_shuffle_v4p0_v4p0__3_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -16574,20 +16694,48 @@ define void @s_shuffle_v4p0_v4p0__3_1_1_1() { } define void @s_shuffle_v4p0_v4p0__4_1_1_1() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) @@ -16644,12 +16792,9 @@ define void @s_shuffle_v4p0_v4p0__5_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16711,12 +16856,9 @@ define void @s_shuffle_v4p0_v4p0__6_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16778,12 +16920,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16842,15 +16981,12 @@ define void @s_shuffle_v4p0_v4p0__7_u_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16913,17 +17049,13 @@ define void @s_shuffle_v4p0_v4p0__7_0_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -16985,14 +17117,10 @@ define void @s_shuffle_v4p0_v4p0__7_2_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17054,14 +17182,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17124,17 +17248,13 @@ define void @s_shuffle_v4p0_v4p0__7_4_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17196,12 +17316,9 @@ define void @s_shuffle_v4p0_v4p0__7_5_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17267,14 +17384,10 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17337,15 +17450,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17404,13 +17514,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17473,15 +17581,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17539,14 +17644,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17604,14 +17705,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17670,15 +17767,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17741,15 +17835,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s14, s2 +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17811,12 +17902,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -17829,149 +17917,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() { } define void @s_shuffle_v4p0_v4p0__u_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__0_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__1_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__2_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__3_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__4_2_2_2() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__5_2_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: +; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -17981,17 +17932,12 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -18001,42 +17947,31 @@ define void @s_shuffle_v4p0_v4p0__5_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__6_2_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: +define void @s_shuffle_v4p0_v4p0__0_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -18046,15 +17981,12 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -18064,42 +17996,31 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_2_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: +define void @s_shuffle_v4p0_v4p0__1_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s10, s12 @@ -18111,15 +18032,12 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s10, s12 @@ -18131,44 +18049,36 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_u_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: +define void @s_shuffle_v4p0_v4p0__2_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18176,17 +18086,16 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18194,115 +18103,88 @@ define void @s_shuffle_v4p0_v4p0__7_u_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_0_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: +define void @s_shuffle_v4p0_v4p0__3_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_1_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: +define void @s_shuffle_v4p0_v4p0__4_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18310,17 +18192,14 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18328,32 +18207,26 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_3_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: +define void @s_shuffle_v4p0_v4p0__5_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -18362,10 +18235,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18373,7 +18246,7 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -18382,10 +18255,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18393,7 +18266,7 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18402,25 +18275,22 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_4_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: +define void @s_shuffle_v4p0_v4p0__6_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -18429,10 +18299,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18440,7 +18308,7 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -18449,10 +18317,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18460,7 +18326,7 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18469,65 +18335,62 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_5_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: +define void @s_shuffle_v4p0_v4p0__7_2_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18536,37 +18399,32 @@ define void @s_shuffle_v4p0_v4p0__7_5_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_6_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: +define void @s_shuffle_v4p0_v4p0__7_u_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18574,19 +18432,17 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18594,7 +18450,7 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18603,61 +18459,65 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_2_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: +define void @s_shuffle_v4p0_v4p0__7_0_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s12 -; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s12 -; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18666,35 +18526,33 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_u_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: +define void @s_shuffle_v4p0_v4p0__7_1_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 ; GFX900-NEXT: s_mov_b32 s14, s12 ; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART @@ -18702,17 +18560,17 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 ; GFX90A-NEXT: s_mov_b32 s14, s12 ; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART @@ -18720,7 +18578,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18729,61 +18587,61 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_0_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: +define void @s_shuffle_v4p0_v4p0__7_3_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18792,67 +18650,62 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_1_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: +define void @s_shuffle_v4p0_v4p0__7_4_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18861,39 +18714,34 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_3_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: +define void @s_shuffle_v4p0_v4p0__7_5_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: s_mov_b32 s14, s16 ; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART @@ -18901,19 +18749,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: s_mov_b32 s14, s16 ; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART @@ -18921,7 +18769,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -18930,203 +18778,182 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_4_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: +define void @s_shuffle_v4p0_v4p0__7_6_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_5_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: +define void @s_shuffle_v4p0_v4p0__7_7_2_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_6_2() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: +define void @s_shuffle_v4p0_v4p0__7_7_u_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19135,197 +18962,58 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_2() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__u_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__0_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__1_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__2_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__3_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__4_3_3_3() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__5_3_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: +define void @s_shuffle_v4p0_v4p0__7_7_0_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19334,61 +19022,63 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__6_3_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: +define void @s_shuffle_v4p0_v4p0__7_7_1_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19397,65 +19087,63 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_3_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: +define void @s_shuffle_v4p0_v4p0__7_7_3_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19464,86 +19152,87 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_u_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: +define void @s_shuffle_v4p0_v4p0__7_7_4_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_0_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: +define void @s_shuffle_v4p0_v4p0__7_7_5_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -19554,18 +19243,16 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -19576,83 +19263,81 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_1_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: +define void @s_shuffle_v4p0_v4p0__7_7_6_2() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -19661,35 +19346,29 @@ define void @s_shuffle_v4p0_v4p0__7_1_3_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_2_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: +define void @s_shuffle_v4p0_v4p0__u_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19697,19 +19376,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19717,46 +19391,33 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_4_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: +define void @s_shuffle_v4p0_v4p0__0_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19764,19 +19425,14 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19784,113 +19440,89 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_5_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: +define void @s_shuffle_v4p0_v4p0__1_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s12, s18 -; GFX900-NEXT: s_mov_b32 s13, s19 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s12, s18 -; GFX90A-NEXT: s_mov_b32 s13, s19 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_6_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: +define void @s_shuffle_v4p0_v4p0__2_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19898,19 +19530,16 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19918,44 +19547,36 @@ define void @s_shuffle_v4p0_v4p0__7_6_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_3_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: +define void @s_shuffle_v4p0_v4p0__3_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -19963,17 +19584,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -19981,127 +19601,116 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_u_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: +define void @s_shuffle_v4p0_v4p0__4_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_0_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: +define void @s_shuffle_v4p0_v4p0__5_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20110,67 +19719,58 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_1_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: +define void @s_shuffle_v4p0_v4p0__6_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20179,27 +19779,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_2_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: +define void @s_shuffle_v4p0_v4p0__7_3_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -20210,12 +19805,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -20226,12 +19825,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20240,23 +19843,22 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_4_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: +define void @s_shuffle_v4p0_v4p0__7_u_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -20267,14 +19869,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -20285,14 +19887,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20301,61 +19903,65 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_5_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: +define void @s_shuffle_v4p0_v4p0__7_0_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20364,65 +19970,59 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_6_3() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: +define void @s_shuffle_v4p0_v4p0__7_1_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s14 -; GFX900-NEXT: s_mov_b32 s9, s15 -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 -; GFX900-NEXT: s_mov_b32 s14, s18 -; GFX900-NEXT: s_mov_b32 s15, s19 +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s14 -; GFX90A-NEXT: s_mov_b32 s9, s15 -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 -; GFX90A-NEXT: s_mov_b32 s14, s18 -; GFX90A-NEXT: s_mov_b32 s15, s19 +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -20431,584 +20031,609 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_3() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__u_4_4_4() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__0_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_2_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__1_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_4_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_setpc_b64 s[30:31] -; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__2_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_5_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s18 +; GFX900-NEXT: s_mov_b32 s13, s19 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s18 +; GFX90A-NEXT: s_mov_b32 s13, s19 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__3_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_6_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__4_4_4_4() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__5_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_3_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__6_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_u_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_4_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_0_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_u_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_1_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_0_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_2_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_1_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_4_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -21017,25 +20642,22 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_2_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_5_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -21046,18 +20668,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -21068,304 +20686,240 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s16 -; GFX942-NEXT: s_mov_b32 s13, s17 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_3_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +define void @s_shuffle_v4p0_v4p0__7_7_6_3() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s18 +; GFX900-NEXT: s_mov_b32 s15, s19 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s18 +; GFX90A-NEXT: s_mov_b32 s15, s19 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s16 -; GFX942-NEXT: s_mov_b32 s13, s17 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_5_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +define void @s_shuffle_v4p0_v4p0__u_4_4_4() { +; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:15] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__0_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s6 -; GFX900-NEXT: s_mov_b32 s11, s7 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s6 -; GFX90A-NEXT: s_mov_b32 s11, s7 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_6_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +define void @s_shuffle_v4p0_v4p0__1_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s20 -; GFX900-NEXT: s_mov_b32 s11, s21 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s20 -; GFX90A-NEXT: s_mov_b32 s11, s21 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_4_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +define void @s_shuffle_v4p0_v4p0__2_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_u_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +define void @s_shuffle_v4p0_v4p0__3_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -21373,14 +20927,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -21388,190 +20940,171 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_0_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +define void @s_shuffle_v4p0_v4p0__4_4_4_4() { +; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: ;;#ASMSTART +; GFX9-NEXT: ; use s[8:15] +; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__5_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s22 -; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s4 ; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s22 -; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s4 ; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_1_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +define void @s_shuffle_v4p0_v4p0__6_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[16:23] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s22 -; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s22 -; GFX900-NEXT: s_mov_b32 s11, s23 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[16:23] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s22 -; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s22 -; GFX90A-NEXT: s_mov_b32 s11, s23 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_2_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +define void @s_shuffle_v4p0_v4p0__7_4_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: s_mov_b32 s14, s4 ; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART @@ -21579,17 +21112,18 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: s_mov_b32 s14, s4 ; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART @@ -21597,46 +21131,38 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_3_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +define void @s_shuffle_v4p0_v4p0__7_u_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: s_mov_b32 s14, s4 ; GFX900-NEXT: s_mov_b32 s15, s5 ; GFX900-NEXT: ;;#ASMSTART @@ -21644,19 +21170,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: s_mov_b32 s14, s4 ; GFX90A-NEXT: s_mov_b32 s15, s5 ; GFX90A-NEXT: ;;#ASMSTART @@ -21664,107 +21187,110 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[16:23] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s22 -; GFX942-NEXT: s_mov_b32 s9, s23 -; GFX942-NEXT: s_mov_b32 s10, s22 -; GFX942-NEXT: s_mov_b32 s11, s23 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_5_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +define void @s_shuffle_v4p0_v4p0__7_0_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s4 -; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s4 -; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_nop 0 +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_6_4() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +define void @s_shuffle_v4p0_v4p0__7_1_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[16:23] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s22 ; GFX900-NEXT: s_mov_b32 s9, s23 -; GFX900-NEXT: s_mov_b32 s10, s22 -; GFX900-NEXT: s_mov_b32 s11, s23 -; GFX900-NEXT: s_mov_b32 s12, s20 -; GFX900-NEXT: s_mov_b32 s13, s21 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: s_mov_b32 s14, s16 ; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART @@ -21772,18 +21298,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_4() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[16:23] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s22 ; GFX90A-NEXT: s_mov_b32 s9, s23 -; GFX90A-NEXT: s_mov_b32 s10, s22 -; GFX90A-NEXT: s_mov_b32 s11, s23 -; GFX90A-NEXT: s_mov_b32 s12, s20 -; GFX90A-NEXT: s_mov_b32 s13, s21 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: s_mov_b32 s14, s16 ; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART @@ -21791,65 +21318,1353 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_4() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s0 -; GFX942-NEXT: s_mov_b32 s15, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_2_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_3_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_5_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s6 +; GFX900-NEXT: s_mov_b32 s11, s7 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s6 +; GFX90A-NEXT: s_mov_b32 s11, s7 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_6_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s20 +; GFX900-NEXT: s_mov_b32 s11, s21 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s20 +; GFX90A-NEXT: s_mov_b32 s11, s21 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_4_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_u_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_0_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_1_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_2_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_3_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[16:23] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23] +; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_5_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s4 +; GFX900-NEXT: s_mov_b32 s15, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s4 +; GFX90A-NEXT: s_mov_b32 s15, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_7_6_4() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[16:23] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s22 +; GFX900-NEXT: s_mov_b32 s9, s23 +; GFX900-NEXT: s_mov_b32 s10, s22 +; GFX900-NEXT: s_mov_b32 s11, s23 +; GFX900-NEXT: s_mov_b32 s12, s20 +; GFX900-NEXT: s_mov_b32 s13, s21 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[16:23] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s22 +; GFX90A-NEXT: s_mov_b32 s9, s23 +; GFX90A-NEXT: s_mov_b32 s10, s22 +; GFX90A-NEXT: s_mov_b32 s11, s23 +; GFX90A-NEXT: s_mov_b32 s12, s20 +; GFX90A-NEXT: s_mov_b32 s13, s21 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } define void @s_shuffle_v4p0_v4p0__u_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__0_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__1_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__2_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__3_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__4_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__5_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__6_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_5_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s10 +; GFX900-NEXT: s_mov_b32 s13, s11 +; GFX900-NEXT: s_mov_b32 s14, s10 +; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s10 +; GFX90A-NEXT: s_mov_b32 s13, s11 +; GFX90A-NEXT: s_mov_b32 s14, s10 +; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_u_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__0_5_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +define void @s_shuffle_v4p0_v4p0__7_0_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s14 -; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -21857,17 +22672,19 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s14 -; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -21875,74 +22692,69 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_nop 0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s2 -; GFX942-NEXT: s_mov_b32 s11, s3 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__1_5_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +define void @s_shuffle_v4p0_v4p0__7_1_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 -; GFX900-NEXT: s_mov_b32 s14, s10 -; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 -; GFX90A-NEXT: s_mov_b32 s14, s10 -; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -21951,253 +22763,150 @@ define void @s_shuffle_v4p0_v4p0__1_5_5_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__2_5_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +define void @s_shuffle_v4p0_v4p0__7_2_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 -; GFX900-NEXT: s_mov_b32 s14, s10 -; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 -; GFX90A-NEXT: s_mov_b32 s14, s10 -; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__3_5_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +define void @s_shuffle_v4p0_v4p0__7_3_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s10 -; GFX900-NEXT: s_mov_b32 s13, s11 -; GFX900-NEXT: s_mov_b32 s14, s10 -; GFX900-NEXT: s_mov_b32 s15, s11 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s10 -; GFX90A-NEXT: s_mov_b32 s13, s11 -; GFX90A-NEXT: s_mov_b32 s14, s10 -; GFX90A-NEXT: s_mov_b32 s15, s11 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ; def s[12:19] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s10 -; GFX942-NEXT: s_mov_b32 s13, s11 -; GFX942-NEXT: s_mov_b32 s14, s10 -; GFX942-NEXT: s_mov_b32 s15, s11 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__4_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__5_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__6_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__7_5_5_5() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s10 -; GFX9-NEXT: s_mov_b32 s13, s11 -; GFX9-NEXT: s_mov_b32 s14, s10 -; GFX9-NEXT: s_mov_b32 s15, s11 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_u_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +define void @s_shuffle_v4p0_v4p0__7_4_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -22205,6 +22914,8 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s4 +; GFX900-NEXT: s_mov_b32 s11, s5 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: s_mov_b32 s14, s6 @@ -22214,7 +22925,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -22222,6 +22933,8 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s4 +; GFX90A-NEXT: s_mov_b32 s11, s5 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: s_mov_b32 s14, s6 @@ -22231,43 +22944,38 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_0_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: +define void @s_shuffle_v4p0_v4p0__7_6_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 ; GFX900-NEXT: s_mov_b32 s12, s14 ; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: ;;#ASMSTART @@ -22275,19 +22983,16 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 ; GFX90A-NEXT: s_mov_b32 s12, s14 ; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: ;;#ASMSTART @@ -22295,114 +23000,92 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_nop 0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_1_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: +define void @s_shuffle_v4p0_v4p0__7_7_5_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s6 +; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s6 +; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[8:15] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_2_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: +define void @s_shuffle_v4p0_v4p0__7_7_u_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s12 -; GFX900-NEXT: s_mov_b32 s11, s13 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 ; GFX900-NEXT: s_mov_b32 s14, s6 ; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -22410,21 +23093,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s12 -; GFX90A-NEXT: s_mov_b32 s11, s13 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 ; GFX90A-NEXT: s_mov_b32 s14, s6 ; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -22432,34 +23108,28 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_3_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: +define void @s_shuffle_v4p0_v4p0__7_7_0_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -22470,14 +23140,16 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -22488,174 +23160,176 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ; def s[4:11] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_4_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: +define void @s_shuffle_v4p0_v4p0__7_7_1_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s10, s4 -; GFX900-NEXT: s_mov_b32 s11, s5 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s18 +; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s10, s18 +; GFX900-NEXT: s_mov_b32 s11, s19 ; GFX900-NEXT: s_mov_b32 s12, s6 ; GFX900-NEXT: s_mov_b32 s13, s7 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s10, s4 -; GFX90A-NEXT: s_mov_b32 s11, s5 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s18 +; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s10, s18 +; GFX90A-NEXT: s_mov_b32 s11, s19 ; GFX90A-NEXT: s_mov_b32 s12, s6 ; GFX90A-NEXT: s_mov_b32 s13, s7 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_6_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: +define void @s_shuffle_v4p0_v4p0__7_7_2_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s14, s6 +; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s14, s6 +; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_5_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: +define void @s_shuffle_v4p0_v4p0__7_7_3_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 ; GFX900-NEXT: s_mov_b32 s14, s6 ; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -22663,16 +23337,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 ; GFX90A-NEXT: s_mov_b32 s14, s6 ; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -22680,33 +23357,31 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_u_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: +define void @s_shuffle_v4p0_v4p0__7_7_4_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART @@ -22714,6 +23389,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s10 ; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s12, s4 +; GFX900-NEXT: s_mov_b32 s13, s5 ; GFX900-NEXT: s_mov_b32 s14, s6 ; GFX900-NEXT: s_mov_b32 s15, s7 ; GFX900-NEXT: ;;#ASMSTART @@ -22721,7 +23398,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART @@ -22729,6 +23406,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s10 ; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s12, s4 +; GFX90A-NEXT: s_mov_b32 s13, s5 ; GFX90A-NEXT: s_mov_b32 s14, s6 ; GFX90A-NEXT: s_mov_b32 s15, s7 ; GFX90A-NEXT: ;;#ASMSTART @@ -22736,201 +23415,175 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_0_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: +define void @s_shuffle_v4p0_v4p0__7_7_6_5() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 ; GFX900-NEXT: s_mov_b32 s10, s18 ; GFX900-NEXT: s_mov_b32 s11, s19 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 ; GFX90A-NEXT: s_mov_b32 s10, s18 ; GFX90A-NEXT: s_mov_b32 s11, s19 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_1_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: +define void @s_shuffle_v4p0_v4p0__u_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s18 -; GFX900-NEXT: s_mov_b32 s11, s19 -; GFX900-NEXT: s_mov_b32 s12, s6 -; GFX900-NEXT: s_mov_b32 s13, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s18 -; GFX90A-NEXT: s_mov_b32 s11, s19 -; GFX90A-NEXT: s_mov_b32 s12, s6 -; GFX90A-NEXT: s_mov_b32 s13, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[4:11] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s10 -; GFX942-NEXT: s_mov_b32 s9, s11 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_2_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: +define void @s_shuffle_v4p0_v4p0__0_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: s_mov_b32 s10, s16 +; GFX900-NEXT: s_mov_b32 s11, s17 +; GFX900-NEXT: s_mov_b32 s12, s16 +; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s14, s16 +; GFX900-NEXT: s_mov_b32 s15, s17 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: s_mov_b32 s10, s16 +; GFX90A-NEXT: s_mov_b32 s11, s17 +; GFX90A-NEXT: s_mov_b32 s12, s16 +; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s14, s16 +; GFX90A-NEXT: s_mov_b32 s15, s17 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART @@ -22939,311 +23592,271 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_5() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_3_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: +define void @s_shuffle_v4p0_v4p0__1_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ; def s[4:11] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s14 -; GFX900-NEXT: s_mov_b32 s13, s15 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: s_mov_b32 s8, s6 +; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ; def s[4:11] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s14 -; GFX90A-NEXT: s_mov_b32 s13, s15 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: s_mov_b32 s8, s6 +; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[12:19] +; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_4_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: +define void @s_shuffle_v4p0_v4p0__2_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] +; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s10 -; GFX900-NEXT: s_mov_b32 s9, s11 -; GFX900-NEXT: s_mov_b32 s12, s4 -; GFX900-NEXT: s_mov_b32 s13, s5 -; GFX900-NEXT: s_mov_b32 s14, s6 -; GFX900-NEXT: s_mov_b32 s15, s7 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s16 +; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] +; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s10 -; GFX90A-NEXT: s_mov_b32 s9, s11 -; GFX90A-NEXT: s_mov_b32 s12, s4 -; GFX90A-NEXT: s_mov_b32 s13, s5 -; GFX90A-NEXT: s_mov_b32 s14, s6 -; GFX90A-NEXT: s_mov_b32 s15, s7 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s16 +; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__7_7_6_5() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: +define void @s_shuffle_v4p0_v4p0__3_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[12:19] ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_mov_b32 s8, s18 ; GFX900-NEXT: s_mov_b32 s9, s19 -; GFX900-NEXT: s_mov_b32 s10, s18 -; GFX900-NEXT: s_mov_b32 s11, s19 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[12:19] ; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_mov_b32 s8, s18 ; GFX90A-NEXT: s_mov_b32 s9, s19 -; GFX90A-NEXT: s_mov_b32 s10, s18 -; GFX90A-NEXT: s_mov_b32 s11, s19 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s2 -; GFX942-NEXT: s_mov_b32 s15, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__u_6_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__0_6_6_6() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: +define void @s_shuffle_v4p0_v4p0__4_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s10, s16 -; GFX900-NEXT: s_mov_b32 s11, s17 -; GFX900-NEXT: s_mov_b32 s12, s16 -; GFX900-NEXT: s_mov_b32 s13, s17 -; GFX900-NEXT: s_mov_b32 s14, s16 -; GFX900-NEXT: s_mov_b32 s15, s17 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:15] ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s10, s16 -; GFX90A-NEXT: s_mov_b32 s11, s17 -; GFX90A-NEXT: s_mov_b32 s12, s16 -; GFX90A-NEXT: s_mov_b32 s13, s17 -; GFX90A-NEXT: s_mov_b32 s14, s16 -; GFX90A-NEXT: s_mov_b32 s15, s17 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:15] ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__1_6_6_6() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: +define void @s_shuffle_v4p0_v4p0__5_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[4:11] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s6 -; GFX900-NEXT: s_mov_b32 s9, s7 +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -23253,17 +23866,14 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[4:11] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s6 -; GFX90A-NEXT: s_mov_b32 s9, s7 +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -23273,44 +23883,35 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__2_6_6_6() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: +define void @s_shuffle_v4p0_v4p0__6_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s16 -; GFX900-NEXT: s_mov_b32 s9, s17 +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -23320,17 +23921,14 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s16 -; GFX90A-NEXT: s_mov_b32 s9, s17 +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -23340,44 +23938,35 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) ret void } -define void @s_shuffle_v4p0_v4p0__3_6_6_6() { -; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: +define void @s_shuffle_v4p0_v4p0__7_6_6_6() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; def s[12:19] -; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[8:15] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_mov_b32 s8, s18 -; GFX900-NEXT: s_mov_b32 s9, s19 +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 ; GFX900-NEXT: s_mov_b32 s10, s12 ; GFX900-NEXT: s_mov_b32 s11, s13 ; GFX900-NEXT: s_mov_b32 s14, s12 @@ -23387,17 +23976,14 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def s[12:19] -; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[8:15] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: s_mov_b32 s8, s18 -; GFX90A-NEXT: s_mov_b32 s9, s19 +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 ; GFX90A-NEXT: s_mov_b32 s10, s12 ; GFX90A-NEXT: s_mov_b32 s11, s13 ; GFX90A-NEXT: s_mov_b32 s14, s12 @@ -23407,119 +23993,19 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s12 -; GFX942-NEXT: s_mov_b32 s11, s13 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__4_6_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__5_6_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__6_6_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__7_6_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23528,20 +24014,48 @@ define void @s_shuffle_v4p0_v4p0__7_6_6_6() { } define void @s_shuffle_v4p0_v4p0__7_u_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23599,12 +24113,9 @@ define void @s_shuffle_v4p0_v4p0__7_0_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23666,12 +24177,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23733,12 +24241,9 @@ define void @s_shuffle_v4p0_v4p0__7_2_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23800,12 +24305,9 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s12 -; GFX942-NEXT: s_mov_b32 s15, s13 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23862,14 +24364,10 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s4 -; GFX942-NEXT: s_mov_b32 s13, s5 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -23882,20 +24380,48 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() { } define void @s_shuffle_v4p0_v4p0__7_5_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23904,22 +24430,53 @@ define void @s_shuffle_v4p0_v4p0__7_5_6_6() { } define void @s_shuffle_v4p0_v4p0__7_7_6_6() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s14, s12 -; GFX9-NEXT: s_mov_b32 s15, s13 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s14, s12 +; GFX900-NEXT: s_mov_b32 s15, s13 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s14, s12 +; GFX90A-NEXT: s_mov_b32 s15, s13 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -23968,12 +24525,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24039,14 +24593,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24112,14 +24662,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24181,12 +24727,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24252,14 +24795,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s18 -; GFX942-NEXT: s_mov_b32 s9, s19 -; GFX942-NEXT: s_mov_b32 s10, s18 -; GFX942-NEXT: s_mov_b32 s11, s19 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s16 -; GFX942-NEXT: s_mov_b32 s15, s17 +; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19] +; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24312,14 +24851,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24376,14 +24911,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s4 -; GFX942-NEXT: s_mov_b32 s15, s5 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24396,20 +24927,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() { } define void @s_shuffle_v4p0_v4p0__u_7_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24467,12 +25026,9 @@ define void @s_shuffle_v4p0_v4p0__0_7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24534,12 +25090,9 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s2 -; GFX942-NEXT: s_mov_b32 s9, s3 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24601,12 +25154,9 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s4 -; GFX942-NEXT: s_mov_b32 s9, s5 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24659,117 +25209,232 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() { ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; -; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7: +; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[0:7] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__4_7_7_7() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__5_7_7_7() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s10 +; GFX900-NEXT: s_mov_b32 s9, s11 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s10 +; GFX90A-NEXT: s_mov_b32 s9, s11 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__6_7_7_7() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s12 +; GFX900-NEXT: s_mov_b32 s9, s13 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s12 +; GFX90A-NEXT: s_mov_b32 s9, s13 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] + %vec0 = call <4 x ptr> asm "; def $0", "=s"() + %vec1 = call <4 x ptr> asm "; def $0", "=s"() + %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> + call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) + ret void +} + +define void @s_shuffle_v4p0_v4p0__7_u_7_7() { +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[8:15] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; def s[0:7] -; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__4_7_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__5_7_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s10 -; GFX9-NEXT: s_mov_b32 s9, s11 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__6_7_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s12 -; GFX9-NEXT: s_mov_b32 s9, s13 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] - %vec0 = call <4 x ptr> asm "; def $0", "=s"() - %vec1 = call <4 x ptr> asm "; def $0", "=s"() - %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> - call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf) - ret void -} - -define void @s_shuffle_v4p0_v4p0__7_u_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -24827,12 +25492,9 @@ define void @s_shuffle_v4p0_v4p0__7_0_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24894,12 +25556,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -24961,12 +25620,9 @@ define void @s_shuffle_v4p0_v4p0__7_2_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s4 -; GFX942-NEXT: s_mov_b32 s11, s5 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25028,12 +25684,9 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s14 -; GFX942-NEXT: s_mov_b32 s13, s15 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25090,14 +25743,10 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s0 -; GFX942-NEXT: s_mov_b32 s11, s1 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25110,20 +25759,48 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() { } define void @s_shuffle_v4p0_v4p0__7_5_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25132,22 +25809,53 @@ define void @s_shuffle_v4p0_v4p0__7_5_7_7() { } define void @s_shuffle_v4p0_v4p0__7_6_7_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s12 -; GFX9-NEXT: s_mov_b32 s11, s13 -; GFX9-NEXT: s_mov_b32 s12, s14 -; GFX9-NEXT: s_mov_b32 s13, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s12 +; GFX900-NEXT: s_mov_b32 s11, s13 +; GFX900-NEXT: s_mov_b32 s12, s14 +; GFX900-NEXT: s_mov_b32 s13, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s12 +; GFX90A-NEXT: s_mov_b32 s11, s13 +; GFX90A-NEXT: s_mov_b32 s12, s14 +; GFX90A-NEXT: s_mov_b32 s13, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13] +; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25156,20 +25864,48 @@ define void @s_shuffle_v4p0_v4p0__7_6_7_7() { } define void @s_shuffle_v4p0_v4p0__7_7_u_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> @@ -25227,12 +25963,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25294,12 +26027,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25361,12 +26091,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25428,12 +26155,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s14 -; GFX942-NEXT: s_mov_b32 s9, s15 -; GFX942-NEXT: s_mov_b32 s10, s14 -; GFX942-NEXT: s_mov_b32 s11, s15 -; GFX942-NEXT: s_mov_b32 s12, s6 -; GFX942-NEXT: s_mov_b32 s13, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25486,14 +26210,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s0 -; GFX942-NEXT: s_mov_b32 s13, s1 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25550,14 +26270,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() { ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:7] ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s8, s6 -; GFX942-NEXT: s_mov_b32 s9, s7 -; GFX942-NEXT: s_mov_b32 s10, s6 -; GFX942-NEXT: s_mov_b32 s11, s7 -; GFX942-NEXT: s_mov_b32 s12, s2 -; GFX942-NEXT: s_mov_b32 s13, s3 -; GFX942-NEXT: s_mov_b32 s14, s6 -; GFX942-NEXT: s_mov_b32 s15, s7 +; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7] +; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3] +; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7] ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[8:15] ; GFX942-NEXT: ;;#ASMEND @@ -25570,20 +26286,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() { } define void @s_shuffle_v4p0_v4p0__7_7_6_7() { -; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; def s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s8, s14 -; GFX9-NEXT: s_mov_b32 s9, s15 -; GFX9-NEXT: s_mov_b32 s10, s14 -; GFX9-NEXT: s_mov_b32 s11, s15 -; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s[8:15] -; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; def s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_mov_b32 s8, s14 +; GFX900-NEXT: s_mov_b32 s9, s15 +; GFX900-NEXT: s_mov_b32 s10, s14 +; GFX900-NEXT: s_mov_b32 s11, s15 +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; use s[8:15] +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; def s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_mov_b32 s8, s14 +; GFX90A-NEXT: s_mov_b32 s9, s15 +; GFX90A-NEXT: s_mov_b32 s10, s14 +; GFX90A-NEXT: s_mov_b32 s11, s15 +; GFX90A-NEXT: ;;#ASMSTART +; GFX90A-NEXT: ; use s[8:15] +; GFX90A-NEXT: ;;#ASMEND +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; def s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15] +; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15] +; GFX942-NEXT: ;;#ASMSTART +; GFX942-NEXT: ; use s[8:15] +; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_setpc_b64 s[30:31] %vec0 = call <4 x ptr> asm "; def $0", "=s"() %vec1 = call <4 x ptr> asm "; def $0", "=s"() %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll index 1e042d3b4a31f..69773bf265e8c 100644 --- a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll +++ b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll @@ -6,25 +6,22 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) %in, ptr addrspace(1) ; GFX942-LABEL: test: ; GFX942: ; %bb.0: ; %entry ; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX942-NEXT: v_mov_b32_e32 v0, 0 -; GFX942-NEXT: v_mov_b32_e32 v2, v0 -; GFX942-NEXT: v_mov_b32_e32 v3, v0 -; GFX942-NEXT: v_mov_b32_e32 v1, v0 +; GFX942-NEXT: v_mov_b64_e32 v[4:5], 0 +; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX942-NEXT: v_mov_b32_e32 v10, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3] -; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1] ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_mov_b32_e32 v12, s4 -; GFX942-NEXT: v_mov_b32_e32 v13, s5 -; GFX942-NEXT: v_mov_b32_e32 v4, s6 -; GFX942-NEXT: v_mov_b32_e32 v5, s7 -; GFX942-NEXT: v_mov_b32_e32 v6, s7 -; GFX942-NEXT: v_mov_b32_e32 v7, s7 +; GFX942-NEXT: v_mov_b32_e32 v8, s4 +; GFX942-NEXT: v_mov_b32_e32 v9, s5 +; GFX942-NEXT: v_mov_b32_e32 v0, s6 +; GFX942-NEXT: v_mov_b32_e32 v1, s7 +; GFX942-NEXT: v_mov_b32_e32 v2, s7 +; GFX942-NEXT: v_mov_b32_e32 v3, s7 ; GFX942-NEXT: s_nop 1 -; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[8:11], v[12:13], v[4:7], v13 +; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[4:7], v[8:9], v[0:3], v9 ; GFX942-NEXT: s_nop 6 -; GFX942-NEXT: global_store_dword v0, v11, s[2:3] offset:12 +; GFX942-NEXT: global_store_dword v10, v7, s[2:3] offset:12 ; GFX942-NEXT: s_endpgm entry: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index b31cc36a5f7c6..1d50d96f43f58 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -263,24 +263,24 @@ define amdgpu_kernel void @s_uint_to_fp_v4i64_to_v4f64(ptr addrspace(1) %out, <4 ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX942-NEXT: v_mov_b32_e32 v10, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s11 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s10 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s9 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s15 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s14 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s13 ; GFX942-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32 ; GFX942-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] ; GFX942-NEXT: v_ldexp_f64 v[0:1], v[4:5], 32 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s8 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s12 ; GFX942-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] -; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s15 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s11 ; GFX942-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s14 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s10 ; GFX942-NEXT: v_add_f64 v[6:7], v[4:5], v[6:7] -; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s13 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s9 ; GFX942-NEXT: v_ldexp_f64 v[4:5], v[4:5], 32 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[8:9], s12 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[8:9], s8 ; GFX942-NEXT: v_add_f64 v[4:5], v[4:5], v[8:9] -; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 -; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] +; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 +; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1] ; GFX942-NEXT: s_endpgm %cast = uitofp <4 x i64> %in to <4 x double> store <4 x double> %cast, ptr addrspace(1) %out, align 16 @@ -412,12 +412,12 @@ define amdgpu_kernel void @s_uint_to_fp_v4i32_to_v4f64(ptr addrspace(1) %out, <4 ; GFX942-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GFX942-NEXT: v_mov_b32_e32 v8, 0 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) -; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s3 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s2 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s1 -; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] offset:16 -; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7] +; GFX942-NEXT: v_cvt_f64_u32_e32 v[2:3], s3 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[0:1], s2 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[6:7], s1 +; GFX942-NEXT: v_cvt_f64_u32_e32 v[4:5], s0 +; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[6:7] offset:16 +; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[6:7] ; GFX942-NEXT: s_endpgm %cast = uitofp <4 x i32> %in to <4 x double> store <4 x double> %cast, ptr addrspace(1) %out, align 16 diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll index b045c761436de..a81c7702e02d7 100644 --- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll +++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll @@ -459,11 +459,10 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa ; GFX942-NEXT: ; %bb.1: ; %bb.1 ; GFX942-NEXT: global_load_dwordx2 v[4:5], v1, s[10:11] ; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0 -; GFX942-NEXT: s_waitcnt vmcnt(1) -; GFX942-NEXT: v_mov_b32_e32 v2, 0 ; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], exec ; GFX942-NEXT: s_and_b64 s[4:5], vcc, exec -; GFX942-NEXT: v_mov_b32_e32 v3, v2 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_mov_b64_e32 v[2:3], 0 ; GFX942-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX942-NEXT: .LBB9_2: ; %Flow ; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]